diff --git a/.gitignore b/.gitignore index 856505c..36920cd 100644 --- a/.gitignore +++ b/.gitignore @@ -50,3 +50,8 @@ _doc/ _bin/ .settings/ ImagePlay.VC.db + +# release-related dirs +IPL/release/ +IPL/debug/ +IPL/lib/ diff --git a/IPL/.qmake.stash b/IPL/.qmake.stash deleted file mode 100644 index 1b8387c..0000000 --- a/IPL/.qmake.stash +++ /dev/null @@ -1,67 +0,0 @@ -QMAKE_XCODE_DEVELOPER_PATH = /Applications/Xcode.app/Contents/Developer -QMAKE_XCODE_VERSION = 6.1.1 -QMAKE_MAC_SDK.macosx.path = /Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX10.10.sdk -QMAKE_MAC_SDK.macx-clang.macosx.QMAKE_CC = /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang -QMAKE_MAC_SDK.macx-clang.macosx.QMAKE_CXX = /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang++ -QMAKE_MAC_SDK.macx-clang.macosx.QMAKE_FIX_RPATH = \ - /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/install_name_tool \ - -id -QMAKE_MAC_SDK.macx-clang.macosx.QMAKE_AR = \ - /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/ar \ - cq -QMAKE_MAC_SDK.macx-clang.macosx.QMAKE_RANLIB = \ - /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/ranlib \ - -s -QMAKE_MAC_SDK.macx-clang.macosx.QMAKE_LINK = /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang++ -QMAKE_MAC_SDK.macx-clang.macosx.QMAKE_LINK_SHLIB = /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang++ -QMAKE_MAC_SDK.macosx.platform_name = macosx -QMAKE_MAC_SDK.macosx.version = 10.10 -QMAKE_MAC_SDK.macosx.platform_path = /Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform -QMAKE_MAC_SDK.macosx10.11.path = /Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX10.11.sdk -QMAKE_MAC_SDK.macosx10.11.platform_path = /Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform -QMAKE_MAC_SDK.macosx10.11.version = 10.11 -QMAKE_MAC_SDK.macx-clang.macosx10.11.QMAKE_CC = /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang -QMAKE_MAC_SDK.macx-clang.macosx10.11.QMAKE_CXX = /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang++ -QMAKE_MAC_SDK.macx-clang.macosx10.11.QMAKE_FIX_RPATH = \ - /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/install_name_tool \ - -id -QMAKE_MAC_SDK.macx-clang.macosx10.11.QMAKE_AR = \ - /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/ar \ - cq -QMAKE_MAC_SDK.macx-clang.macosx10.11.QMAKE_RANLIB = \ - /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/ranlib \ - -s -QMAKE_MAC_SDK.macx-clang.macosx10.11.QMAKE_LINK = /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang++ -QMAKE_MAC_SDK.macx-clang.macosx10.11.QMAKE_LINK_SHLIB = /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang++ -QMAKE_MAC_SDK.macosx10.11.platform_name = macosx -QMAKE_MAC_SDK.macosx10.12.path = /Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX10.12.sdk -QMAKE_MAC_SDK.macosx10.12.platform_path = /Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform -QMAKE_MAC_SDK.macosx10.12.version = 10.12 -QMAKE_MAC_SDK.macx-clang.macosx10.12.QMAKE_CC = /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang -QMAKE_MAC_SDK.macx-clang.macosx10.12.QMAKE_CXX = /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang++ -QMAKE_MAC_SDK.macx-clang.macosx10.12.QMAKE_FIX_RPATH = \ - /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/install_name_tool \ - -id -QMAKE_MAC_SDK.macx-clang.macosx10.12.QMAKE_AR = \ - /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/ar \ - cq -QMAKE_MAC_SDK.macx-clang.macosx10.12.QMAKE_RANLIB = \ - /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/ranlib \ - -s -QMAKE_MAC_SDK.macx-clang.macosx10.12.QMAKE_LINK = /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang++ -QMAKE_MAC_SDK.macx-clang.macosx10.12.QMAKE_LINK_SHLIB = /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang++ -QMAKE_MAC_SDK.macosx10.12.platform_name = macosx -QMAKE_DEFAULT_INCDIRS = \ - /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/include/c++/v1 \ - /usr/local/include \ - /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/lib/clang/8.0.0/include \ - /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/include \ - /usr/include \ - "/System/Library/Frameworks (framework directory)" \ - "/Library/Frameworks (framework directory)" -QMAKE_DEFAULT_LIBDIRS = \ - /lib \ - /usr/lib -QMAKE_MAC_SDK.macosx10.12.Path = /Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX10.12.sdk -QMAKE_MAC_SDK.macosx10.12.PlatformPath = /Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform -QMAKE_MAC_SDK.macosx10.12.SDKVersion = 10.12 diff --git a/IPL/IPL.pro b/IPL/IPL.pro index 0a656ba..e38c58b 100644 --- a/IPL/IPL.pro +++ b/IPL/IPL.pro @@ -1,166 +1,166 @@ -############################################################################# -# -# This file is part of ImagePlay. -# -# ImagePlay is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# ImagePlay is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with ImagePlay. If not, see . -# -############################################################################## - -CONFIG -= qt - -TARGET = IPL -CONFIG(debug, debug|release): DESTDIR = ../ImagePlay/debug -else: DESTDIR = ../ImagePlay/release - -#define platform variable for folder name -win32 {contains(QMAKE_TARGET.arch, x86_64) {PLATFORM = x64} else {PLATFORM = Win32}} -macx {PLATFORM = macx} -unix:!macx:!android {PLATFORM = linux} - -#define configuration variable for folder name -CONFIG(debug, debug|release) {CONFIGURATION = Debug} else {CONFIGURATION = Release} - -DESTDIR = ../_bin/$$CONFIGURATION/$$PLATFORM -OBJECTS_DIR = ../intermediate/$$TARGET/$$CONFIGURATION/$$PLATFORM -MOC_DIR = ../intermediate/$$TARGET/$$CONFIGURATION/$$PLATFORM -RCC_DIR = ../intermediate/$$TARGET/$$CONFIGURATION/$$PLATFORM -UI_DIR = ../intermediate/$$TARGET/$$CONFIGURATION/$$PLATFORM - -#TEMPLATE = vclib -TEMPLATE = lib -#CONFIG += lib_bundle - - -DEFINES += IPL_LIBRARY - -HEADERS += $$files(*.h,true) -SOURCES += $$files(*.cpp,true) -OTHER_FILES += $$files(*,true) - -#win32: LIBS += -L$$PWD/lib/FreeImage/ -lFreeImage - -#INCLUDEPATH += $$PWD/lib/FreeImage/ -#DEPENDPATH += $$PWD/lib/FreeImage/ - -#win32: PRE_TARGETDEPS += $$PWD/lib/FreeImage/FreeImage.lib - -win32 { - # dirent - INCLUDEPATH += $$PWD/lib/ - SOURCES += include/dirent/dirent.c - HEADERS += include/dirent/dirent.h - - # freeimage - LIBS += -L$$PWD/../_lib/freeimage/ -lFreeImage - INCLUDEPATH += $$PWD/../_lib/freeimage - DEPENDPATH += $$PWD/../_lib/freeimage - - # opencv - CONFIG(release, debug|release) { - LIBS += -L$$PWD/../_lib/opencv/x64/vc14/lib/ -lopencv_core310 - LIBS += -L$$PWD/../_lib/opencv/x64/vc14/lib/ -lopencv_imgproc310 - LIBS += -L$$PWD/../_lib/opencv/x64/vc14/lib/ -lopencv_highgui310 - LIBS += -L$$PWD/../_lib/opencv/x64/vc14/lib/ -lopencv_videoio310 - LIBS += -L$$PWD/../_lib/opencv/x64/vc14/lib/ -lopencv_calib3d310 - LIBS += -L$$PWD/../_lib/opencv/x64/vc14/lib/ -lopencv_optflow310 - LIBS += -L$$PWD/../_lib/opencv/x64/vc14/lib/ -lopencv_features2d310 - LIBS += -L$$PWD/../_lib/opencv/x64/vc14/lib/ -lopencv_xfeatures2d310 - LIBS += -L$$PWD/../_lib/opencv/x64/vc14/lib/ -lopencv_photo310 - LIBS += -L$$PWD/../_lib/opencv/x64/vc14/lib/ -lopencv_xphoto310 - } else { - LIBS += -L$$PWD/../_lib/opencv/x64/vc14/lib/ -lopencv_core310d - LIBS += -L$$PWD/../_lib/opencv/x64/vc14/lib/ -lopencv_imgproc310d - LIBS += -L$$PWD/../_lib/opencv/x64/vc14/lib/ -lopencv_highgui310d - LIBS += -L$$PWD/../_lib/opencv/x64/vc14/lib/ -lopencv_videoio310d - LIBS += -L$$PWD/../_lib/opencv/x64/vc14/lib/ -lopencv_calib3d310d - LIBS += -L$$PWD/../_lib/opencv/x64/vc14/lib/ -lopencv_optflow310d - LIBS += -L$$PWD/../_lib/opencv/x64/vc14/lib/ -lopencv_features2d310d - LIBS += -L$$PWD/../_lib/opencv/x64/vc14/lib/ -lopencv_xfeatures2d310d - LIBS += -L$$PWD/../_lib/opencv/x64/vc14/lib/ -lopencv_photo310d - LIBS += -L$$PWD/../_lib/opencv/x64/vc14/lib/ -lopencv_xphoto310d - } -} - - - -macx { - QMAKE_MAC_SDK = macosx10.12 - LIBS += -L$$PWD/../_lib/freeimage/ -lfreeimage-3.16.0 - - INCLUDEPATH += $$PWD/../_lib/freeimage - DEPENDPATH += $$PWD/../_lib/freeimage - - #DESTDIR = ../_bin/$$CONFIGURATION/$$PLATFORM/ImagePlay.app/Contents/Frameworks/ - DESTDIR = ../_lib/ - - LIBS += -L$$PWD/../_lib/opencv/x64/clang/lib/ -lopencv_core.3.1.0 - LIBS += -L$$PWD/../_lib/opencv/x64/clang/lib/ -lopencv_imgproc.3.1.0 - LIBS += -L$$PWD/../_lib/opencv/x64/clang/lib/ -lopencv_highgui.3.1.0 - LIBS += -L$$PWD/../_lib/opencv/x64/clang/lib/ -lopencv_videoio.3.1.0 - LIBS += -L$$PWD/../_lib/opencv/x64/clang/lib/ -lopencv_calib3d.3.1.0 - LIBS += -L$$PWD/../_lib/opencv/x64/clang/lib/ -lopencv_optflow.3.1.0 - LIBS += -L$$PWD/../_lib/opencv/x64/clang/lib/ -lopencv_features2d.3.1.0 - LIBS += -L$$PWD/../_lib/opencv/x64/clang/lib/ -lopencv_xfeatures2d.3.1.0 - LIBS += -L$$PWD/../_lib/opencv/x64/clang/lib/ -lopencv_photo.3.1.0 - LIBS += -L$$PWD/../_lib/opencv/x64/clang/lib/ -lopencv_xphoto.3.1.0 - -} - -linux { - CONFIG += staticlib - - LIBS += -lfreeimage - LIBS += -lopencv_core - LIBS += -lopencv_imgproc - LIBS += -lopencv_highgui - LIBS += -lopencv_videoio - LIBS += -lopencv_calib3d - LIBS += -lopencv_optflow - LIBS += -lopencv_features2d - LIBS += -lopencv_xfeatures2d - LIBS += -lopencv_photo - LIBS += -lopencv_xphoto - - DESTDIR = ../_bin/$$CONFIGURATION/$$PLATFORM/ -} - -msvc { - QMAKE_CXXFLAGS += -openmp - QMAKE_LFLAGS += -openmp - - #QMAKE_CXXFLAGS_RELEASE -= -O1 - #QMAKE_CXXFLAGS_RELEASE -= -O2 - #QMAKE_CXXFLAGS_RELEASE *= -O3 -} - -clang { - CONFIG +=c++11 - QMAKE_CXXFLAGS += -openmp - QMAKE_LFLAGS += -openmp -} - -gcc:!clang { - CONFIG +=c++11 - QMAKE_CXXFLAGS += -fopenmp - QMAKE_LFLAGS += -fopenmp - LIBS += -lgomp -} - - -# IPL -INCLUDEPATH += $$PWD/include/ -INCLUDEPATH += $$PWD/include/processes/ - -# OpenCV -INCLUDEPATH += $$PWD/include/opencv/ +############################################################################# +# +# This file is part of ImagePlay. +# +# ImagePlay is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# ImagePlay is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with ImagePlay. If not, see . +# +############################################################################## + +CONFIG -= qt + +TARGET = IPL +CONFIG(debug, debug|release): DESTDIR = ../ImagePlay/debug +else: DESTDIR = ../ImagePlay/release + +#define platform variable for folder name +win32 {contains(QMAKE_TARGET.arch, x86_64) {PLATFORM = x64} else {PLATFORM = Win32}} +macx {PLATFORM = macx} +unix:!macx:!android {PLATFORM = linux} + +#define configuration variable for folder name +CONFIG(debug, debug|release) {CONFIGURATION = Debug} else {CONFIGURATION = Release} + +DESTDIR = ../_bin/$$CONFIGURATION/$$PLATFORM +OBJECTS_DIR = ../intermediate/$$TARGET/$$CONFIGURATION/$$PLATFORM +MOC_DIR = ../intermediate/$$TARGET/$$CONFIGURATION/$$PLATFORM +RCC_DIR = ../intermediate/$$TARGET/$$CONFIGURATION/$$PLATFORM +UI_DIR = ../intermediate/$$TARGET/$$CONFIGURATION/$$PLATFORM + +#TEMPLATE = vclib +TEMPLATE = lib +#CONFIG += lib_bundle + + +DEFINES += IPL_LIBRARY + +HEADERS += $$files(*.h,true) +SOURCES += $$files(*.cpp,true) +OTHER_FILES += $$files(*,true) + +win32: LIBS += -L$$PWD/lib/FreeImage/ -lFreeImage + +INCLUDEPATH += $$PWD/lib/FreeImage/ +DEPENDPATH += $$PWD/lib/FreeImage/ + +win32: PRE_TARGETDEPS += $$PWD/lib/FreeImage/FreeImage.lib + +win32 { + # dirent + INCLUDEPATH += $$PWD/lib/ + SOURCES += include/dirent/dirent.c + HEADERS += include/dirent/dirent.h + + # freeimage + LIBS += -L$$PWD/../_lib/freeimage/ -lFreeImage + INCLUDEPATH += $$PWD/../_lib/freeimage + DEPENDPATH += $$PWD/../_lib/freeimage + + # opencv + CONFIG(release, debug|release) { + LIBS += -L$$PWD/../_lib/opencv/x64/vc16/lib/ -lopencv_core430 + LIBS += -L$$PWD/../_lib/opencv/x64/vc16/lib/ -lopencv_imgproc430 + LIBS += -L$$PWD/../_lib/opencv/x64/vc16/lib/ -lopencv_highgui430 + LIBS += -L$$PWD/../_lib/opencv/x64/vc16/lib/ -lopencv_videoio430 + LIBS += -L$$PWD/../_lib/opencv/x64/vc16/lib/ -lopencv_calib3d430 + LIBS += -L$$PWD/../_lib/opencv/x64/vc16/lib/ -lopencv_optflow430 + LIBS += -L$$PWD/../_lib/opencv/x64/vc16/lib/ -lopencv_features2d430 + LIBS += -L$$PWD/../_lib/opencv/x64/vc16/lib/ -lopencv_xfeatures2d430 + LIBS += -L$$PWD/../_lib/opencv/x64/vc16/lib/ -lopencv_photo430 + LIBS += -L$$PWD/../_lib/opencv/x64/vc16/lib/ -lopencv_xphoto430 + } else { + LIBS += -L$$PWD/../_lib/opencv/x64/vc16/lib/ -lopencv_core430d + LIBS += -L$$PWD/../_lib/opencv/x64/vc16/lib/ -lopencv_imgproc430d + LIBS += -L$$PWD/../_lib/opencv/x64/vc16/lib/ -lopencv_highgui430d + LIBS += -L$$PWD/../_lib/opencv/x64/vc16/lib/ -lopencv_videoio430d + LIBS += -L$$PWD/../_lib/opencv/x64/vc16/lib/ -lopencv_calib3d430d + LIBS += -L$$PWD/../_lib/opencv/x64/vc16/lib/ -lopencv_optflow430d + LIBS += -L$$PWD/../_lib/opencv/x64/vc16/lib/ -lopencv_features2d430d + LIBS += -L$$PWD/../_lib/opencv/x64/vc16/lib/ -lopencv_xfeatures2d430d + LIBS += -L$$PWD/../_lib/opencv/x64/vc16/lib/ -lopencv_photo430d + LIBS += -L$$PWD/../_lib/opencv/x64/vc16/lib/ -lopencv_xphoto430d + } +} + + + +macx { + QMAKE_MAC_SDK = macosx10.12 + LIBS += -L$$PWD/../_lib/freeimage/ -lfreeimage-3.16.0 + + INCLUDEPATH += $$PWD/../_lib/freeimage + DEPENDPATH += $$PWD/../_lib/freeimage + + #DESTDIR = ../_bin/$$CONFIGURATION/$$PLATFORM/ImagePlay.app/Contents/Frameworks/ + DESTDIR = ../_lib/ + + LIBS += -L$$PWD/../_lib/opencv/x64/clang/lib/ -lopencv_core.3.1.0 + LIBS += -L$$PWD/../_lib/opencv/x64/clang/lib/ -lopencv_imgproc.3.1.0 + LIBS += -L$$PWD/../_lib/opencv/x64/clang/lib/ -lopencv_highgui.3.1.0 + LIBS += -L$$PWD/../_lib/opencv/x64/clang/lib/ -lopencv_videoio.3.1.0 + LIBS += -L$$PWD/../_lib/opencv/x64/clang/lib/ -lopencv_calib3d.3.1.0 + LIBS += -L$$PWD/../_lib/opencv/x64/clang/lib/ -lopencv_optflow.3.1.0 + LIBS += -L$$PWD/../_lib/opencv/x64/clang/lib/ -lopencv_features2d.3.1.0 + LIBS += -L$$PWD/../_lib/opencv/x64/clang/lib/ -lopencv_xfeatures2d.3.1.0 + LIBS += -L$$PWD/../_lib/opencv/x64/clang/lib/ -lopencv_photo.3.1.0 + LIBS += -L$$PWD/../_lib/opencv/x64/clang/lib/ -lopencv_xphoto.3.1.0 + +} + +linux { + CONFIG += staticlib + + LIBS += -lfreeimage + LIBS += -lopencv_core + LIBS += -lopencv_imgproc + LIBS += -lopencv_highgui + LIBS += -lopencv_videoio + LIBS += -lopencv_calib3d + LIBS += -lopencv_optflow + LIBS += -lopencv_features2d + LIBS += -lopencv_xfeatures2d + LIBS += -lopencv_photo + LIBS += -lopencv_xphoto + + DESTDIR = ../_bin/$$CONFIGURATION/$$PLATFORM/ +} + +msvc { + QMAKE_CXXFLAGS += -openmp + QMAKE_LFLAGS += -openmp + + #QMAKE_CXXFLAGS_RELEASE -= -O1 + #QMAKE_CXXFLAGS_RELEASE -= -O2 + #QMAKE_CXXFLAGS_RELEASE *= -O3 +} + +clang { + CONFIG +=c++11 + QMAKE_CXXFLAGS += -openmp + QMAKE_LFLAGS += -openmp +} + +gcc:!clang { + CONFIG +=c++11 + QMAKE_CXXFLAGS += -fopenmp + QMAKE_LFLAGS += -fopenmp + LIBS += -lgomp +} + + +# IPL +INCLUDEPATH += $$PWD/include/ +INCLUDEPATH += $$PWD/include/processes/ + +# OpenCV +INCLUDEPATH += $$PWD/include/opencv/ diff --git a/IPL/include/opencv/opencv/cv.hpp b/IPL/include/opencv/opencv/cv.hpp deleted file mode 100644 index e498d7a..0000000 --- a/IPL/include/opencv/opencv/cv.hpp +++ /dev/null @@ -1,60 +0,0 @@ -/*M/////////////////////////////////////////////////////////////////////////////////////// -// -// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. -// -// By downloading, copying, installing or using the software you agree to this license. -// If you do not agree to this license, do not download, install, -// copy or use the software. -// -// -// License Agreement -// For Open Source Computer Vision Library -// -// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. -// Copyright (C) 2009, Willow Garage Inc., all rights reserved. -// Third party copyrights are property of their respective owners. -// -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: -// -// * Redistribution's of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// * Redistribution's in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// * The name of the copyright holders may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// This software is provided by the copyright holders and contributors "as is" and -// any express or implied warranties, including, but not limited to, the implied -// warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, -// indirect, incidental, special, exemplary, or consequential damages -// (including, but not limited to, procurement of substitute goods or services; -// loss of use, data, or profits; or business interruption) however caused -// and on any theory of liability, whether in contract, strict liability, -// or tort (including negligence or otherwise) arising in any way out of -// the use of this software, even if advised of the possibility of such damage. -// -//M*/ - -#ifndef __OPENCV_OLD_CV_HPP__ -#define __OPENCV_OLD_CV_HPP__ - -//#if defined(__GNUC__) -//#warning "This is a deprecated opencv header provided for compatibility. Please include a header from a corresponding opencv module" -//#endif - -#include "cv.h" -#include "opencv2/core.hpp" -#include "opencv2/imgproc.hpp" -#include "opencv2/photo.hpp" -#include "opencv2/video.hpp" -#include "opencv2/highgui.hpp" -#include "opencv2/features2d.hpp" -#include "opencv2/calib3d.hpp" -#include "opencv2/objdetect.hpp" - -#endif diff --git a/IPL/include/opencv/opencv/cvaux.h b/IPL/include/opencv/opencv/cvaux.h deleted file mode 100644 index fe86c5d..0000000 --- a/IPL/include/opencv/opencv/cvaux.h +++ /dev/null @@ -1,57 +0,0 @@ -/*M/////////////////////////////////////////////////////////////////////////////////////// -// -// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. -// -// By downloading, copying, installing or using the software you agree to this license. -// If you do not agree to this license, do not download, install, -// copy or use the software. -// -// -// Intel License Agreement -// For Open Source Computer Vision Library -// -// Copyright (C) 2000, Intel Corporation, all rights reserved. -// Third party copyrights are property of their respective owners. -// -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: -// -// * Redistribution's of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// * Redistribution's in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// * The name of Intel Corporation may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// This software is provided by the copyright holders and contributors "as is" and -// any express or implied warranties, including, but not limited to, the implied -// warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, -// indirect, incidental, special, exemplary, or consequential damages -// (including, but not limited to, procurement of substitute goods or services; -// loss of use, data, or profits; or business interruption) however caused -// and on any theory of liability, whether in contract, strict liability, -// or tort (including negligence or otherwise) arising in any way out of -// the use of this software, even if advised of the possibility of such damage. -// -//M*/ - -#ifndef __OPENCV_OLD_AUX_H__ -#define __OPENCV_OLD_AUX_H__ - -//#if defined(__GNUC__) -//#warning "This is a deprecated opencv header provided for compatibility. Please include a header from a corresponding opencv module" -//#endif - -#include "opencv2/core/core_c.h" -#include "opencv2/imgproc/imgproc_c.h" -#include "opencv2/photo/photo_c.h" -#include "opencv2/video/tracking_c.h" -#include "opencv2/objdetect/objdetect_c.h" - -#endif - -/* End of file. */ diff --git a/IPL/include/opencv/opencv/cvwimage.h b/IPL/include/opencv/opencv/cvwimage.h deleted file mode 100644 index de89c92..0000000 --- a/IPL/include/opencv/opencv/cvwimage.h +++ /dev/null @@ -1,46 +0,0 @@ -/////////////////////////////////////////////////////////////////////////////// -// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. -// -// By downloading, copying, installing or using the software you agree to -// this license. If you do not agree to this license, do not download, -// install, copy or use the software. -// -// License Agreement -// For Open Source Computer Vision Library -// -// Copyright (C) 2008, Google, all rights reserved. -// Third party copyrights are property of their respective owners. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are met: -// -// * Redistribution's of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// * Redistribution's in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// * The name of Intel Corporation or contributors may not be used to endorse -// or promote products derived from this software without specific -// prior written permission. -// -// This software is provided by the copyright holders and contributors "as is" -// and any express or implied warranties, including, but not limited to, the -// implied warranties of merchantability and fitness for a particular purpose -// are disclaimed. In no event shall the Intel Corporation or contributors be -// liable for any direct, indirect, incidental, special, exemplary, or -// consequential damages -// (including, but not limited to, procurement of substitute goods or services; -// loss of use, data, or profits; or business interruption) however caused -// and on any theory of liability, whether in contract, strict liability, -// or tort (including negligence or otherwise) arising in any way out of -// the use of this software, even if advised of the possibility of such damage. - - -#ifndef __OPENCV_OLD_WIMAGE_HPP__ -#define __OPENCV_OLD_WIMAGE_HPP__ - -#include "opencv2/core/wimage.hpp" - -#endif diff --git a/IPL/include/opencv/opencv/cxmisc.h b/IPL/include/opencv/opencv/cxmisc.h deleted file mode 100644 index 6c93a0c..0000000 --- a/IPL/include/opencv/opencv/cxmisc.h +++ /dev/null @@ -1,8 +0,0 @@ -#ifndef __OPENCV_OLD_CXMISC_H__ -#define __OPENCV_OLD_CXMISC_H__ - -#ifdef __cplusplus -# include "opencv2/core/utility.hpp" -#endif - -#endif diff --git a/IPL/include/opencv/opencv/ml.h b/IPL/include/opencv/opencv/ml.h deleted file mode 100644 index d8e967f..0000000 --- a/IPL/include/opencv/opencv/ml.h +++ /dev/null @@ -1,47 +0,0 @@ -/*M/////////////////////////////////////////////////////////////////////////////////////// -// -// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. -// -// By downloading, copying, installing or using the software you agree to this license. -// If you do not agree to this license, do not download, install, -// copy or use the software. -// -// -// Intel License Agreement -// -// Copyright (C) 2000, Intel Corporation, all rights reserved. -// Third party copyrights are property of their respective owners. -// -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: -// -// * Redistribution's of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// * Redistribution's in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// * The name of Intel Corporation may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// This software is provided by the copyright holders and contributors "as is" and -// any express or implied warranties, including, but not limited to, the implied -// warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, -// indirect, incidental, special, exemplary, or consequential damages -// (including, but not limited to, procurement of substitute goods or services; -// loss of use, data, or profits; or business interruption) however caused -// and on any theory of liability, whether in contract, strict liability, -// or tort (including negligence or otherwise) arising in any way out of -// the use of this software, even if advised of the possibility of such damage. -// -//M*/ - -#ifndef __OPENCV_OLD_ML_H__ -#define __OPENCV_OLD_ML_H__ - -#include "opencv2/core/core_c.h" -#include "opencv2/ml.hpp" - -#endif diff --git a/IPL/include/opencv/opencv2/aruco.hpp b/IPL/include/opencv/opencv2/aruco.hpp index 3f45dc1..3cf62d2 100644 --- a/IPL/include/opencv/opencv2/aruco.hpp +++ b/IPL/include/opencv/opencv2/aruco.hpp @@ -49,14 +49,16 @@ the use of this software, even if advised of the possibility of such damage. * These markers are useful for easy, fast and robust camera pose estimation.ç * * The main functionalities are: - * - Detection of markers in a image + * - Detection of markers in an image * - Pose estimation from a single marker or from a board/set of markers * - Detection of ChArUco board for high subpixel accuracy * - Camera calibration from both, ArUco boards and ChArUco boards. * - Detection of ChArUco diamond markers * The samples directory includes easy examples of how to use the module. * - * The implementation is based on the ArUco Library by R. Muñoz-Salinas and S. Garrido-Jurado. + * The implementation is based on the ArUco Library by R. Muñoz-Salinas and S. Garrido-Jurado @cite Aruco2014. + * + * Markers can also be detected based on the AprilTag 2 @cite wang2016iros fiducial detection method. * * @sa S. Garrido-Jurado, R. Muñoz-Salinas, F. J. Madrid-Cuevas, and M. J. Marín-Jiménez. 2014. * "Automatic generation and detection of highly reliable fiducial markers under occlusion". @@ -76,7 +78,12 @@ namespace aruco { //! @addtogroup aruco //! @{ - +enum CornerRefineMethod{ + CORNER_REFINE_NONE, ///< Tag and corners detection based on the ArUco approach + CORNER_REFINE_SUBPIX, ///< ArUco approach and refine the corners locations using corner subpixel accuracy + CORNER_REFINE_CONTOUR, ///< ArUco approach and refine the corners locations using the contour-points line fitting + CORNER_REFINE_APRILTAG, ///< Tag and corners detection based on the AprilTag 2 approach @cite wang2016iros +}; /** * @brief Parameters for the detectMarker process: @@ -100,18 +107,20 @@ namespace aruco { * - minMarkerDistanceRate: minimum mean distance beetween two marker corners to be considered * similar, so that the smaller one is removed. The rate is relative to the smaller perimeter * of the two markers (default 0.05). - * - doCornerRefinement: do subpixel refinement or not + * - cornerRefinementMethod: corner refinement method. (CORNER_REFINE_NONE, no refinement. + * CORNER_REFINE_SUBPIX, do subpixel refinement. CORNER_REFINE_CONTOUR use contour-Points, + * CORNER_REFINE_APRILTAG use the AprilTag2 approach) * - cornerRefinementWinSize: window size for the corner refinement process (in pixels) (default 5). * - cornerRefinementMaxIterations: maximum number of iterations for stop criteria of the corner * refinement process (default 30). * - cornerRefinementMinAccuracy: minimum error for the stop cristeria of the corner refinement * process (default: 0.1) * - markerBorderBits: number of bits of the marker border, i.e. marker border width (default 1). - * - perpectiveRemovePixelPerCell: number of bits (per dimension) for each cell of the marker + * - perspectiveRemovePixelPerCell: number of bits (per dimension) for each cell of the marker * when removing the perspective (default 8). * - perspectiveRemoveIgnoredMarginPerCell: width of the margin of pixels on each cell not * considered for the determination of the cell bit. Represents the rate respect to the total - * size of the cell, i.e. perpectiveRemovePixelPerCell (default 0.13) + * size of the cell, i.e. perspectiveRemovePixelPerCell (default 0.13) * - maxErroneousBitsInBorderRate: maximum number of accepted erroneous bits in the border (i.e. * number of allowed white bits in the border). Represented as a rate respect to the total * number of bits per marker (default 0.35). @@ -120,6 +129,23 @@ namespace aruco { * than 128 or not) (default 5.0) * - errorCorrectionRate error correction rate respect to the maximun error correction capability * for each dictionary. (default 0.6). + * - aprilTagMinClusterPixels: reject quads containing too few pixels. + * - aprilTagMaxNmaxima: how many corner candidates to consider when segmenting a group of pixels into a quad. + * - aprilTagCriticalRad: Reject quads where pairs of edges have angles that are close to straight or close to + * 180 degrees. Zero means that no quads are rejected. (In radians). + * - aprilTagMaxLineFitMse: When fitting lines to the contours, what is the maximum mean squared error + * allowed? This is useful in rejecting contours that are far from being quad shaped; rejecting + * these quads "early" saves expensive decoding processing. + * - aprilTagMinWhiteBlackDiff: When we build our model of black & white pixels, we add an extra check that + * the white model must be (overall) brighter than the black model. How much brighter? (in pixel values, [0,255]). + * - aprilTagDeglitch: should the thresholded image be deglitched? Only useful for very noisy images + * - aprilTagQuadDecimate: Detection of quads can be done on a lower-resolution image, improving speed at a + * cost of pose accuracy and a slight decrease in detection rate. Decoding the binary payload is still + * done at full resolution. + * - aprilTagQuadSigma: What Gaussian blur should be applied to the segmented image (used for quad detection?) + * Parameter is the standard deviation in pixels. Very noisy images benefit from non-zero values (e.g. 0.8). + * - detectInvertedMarker: to check if there is a white marker. In order to generate a "white" marker just + * invert a normal marker by using a tilde, ~markerImage. (default false) */ struct CV_EXPORTS_W DetectorParameters { @@ -137,7 +163,7 @@ struct CV_EXPORTS_W DetectorParameters { CV_PROP_RW double minCornerDistanceRate; CV_PROP_RW int minDistanceToBorder; CV_PROP_RW double minMarkerDistanceRate; - CV_PROP_RW bool doCornerRefinement; + CV_PROP_RW int cornerRefinementMethod; CV_PROP_RW int cornerRefinementWinSize; CV_PROP_RW int cornerRefinementMaxIterations; CV_PROP_RW double cornerRefinementMinAccuracy; @@ -147,6 +173,21 @@ struct CV_EXPORTS_W DetectorParameters { CV_PROP_RW double maxErroneousBitsInBorderRate; CV_PROP_RW double minOtsuStdDev; CV_PROP_RW double errorCorrectionRate; + + // April :: User-configurable parameters. + CV_PROP_RW float aprilTagQuadDecimate; + CV_PROP_RW float aprilTagQuadSigma; + + // April :: Internal variables + CV_PROP_RW int aprilTagMinClusterPixels; + CV_PROP_RW int aprilTagMaxNmaxima; + CV_PROP_RW float aprilTagCriticalRad; + CV_PROP_RW float aprilTagMaxLineFitMse; + CV_PROP_RW int aprilTagMinWhiteBlackDiff; + CV_PROP_RW int aprilTagDeglitch; + + // to detect white (inverted) markers + CV_PROP_RW bool detectInvertedMarker; }; @@ -165,6 +206,10 @@ struct CV_EXPORTS_W DetectorParameters { * @param parameters marker detection parameters * @param rejectedImgPoints contains the imgPoints of those squares whose inner code has not a * correct codification. Useful for debugging purposes. + * @param cameraMatrix optional input 3x3 floating-point camera matrix + * \f$A = \vecthreethree{f_x}{0}{c_x}{0}{f_y}{c_y}{0}{0}{1}\f$ + * @param distCoeff optional vector of distortion coefficients + * \f$(k_1, k_2, p_1, p_2[, k_3[, k_4, k_5, k_6],[s_1, s_2, s_3, s_4]])\f$ of 4, 5, 8 or 12 elements * * Performs marker detection in the input image. Only markers included in the specific dictionary * are searched. For each detected marker, it returns the 2D position of its corner in the image @@ -173,9 +218,9 @@ struct CV_EXPORTS_W DetectorParameters { * @sa estimatePoseSingleMarkers, estimatePoseBoard * */ -CV_EXPORTS_W void detectMarkers(InputArray image, Ptr &dictionary, OutputArrayOfArrays corners, +CV_EXPORTS_W void detectMarkers(InputArray image, const Ptr &dictionary, OutputArrayOfArrays corners, OutputArray ids, const Ptr ¶meters = DetectorParameters::create(), - OutputArrayOfArrays rejectedImgPoints = noArray()); + OutputArrayOfArrays rejectedImgPoints = noArray(), InputArray cameraMatrix= noArray(), InputArray distCoeff= noArray()); @@ -192,10 +237,11 @@ CV_EXPORTS_W void detectMarkers(InputArray image, Ptr &dictionary, O * \f$A = \vecthreethree{f_x}{0}{c_x}{0}{f_y}{c_y}{0}{0}{1}\f$ * @param distCoeffs vector of distortion coefficients * \f$(k_1, k_2, p_1, p_2[, k_3[, k_4, k_5, k_6],[s_1, s_2, s_3, s_4]])\f$ of 4, 5, 8 or 12 elements - * @param rvecs array of output rotation vectors (@sa Rodrigues) (e.g. std::vector>). + * @param rvecs array of output rotation vectors (@sa Rodrigues) (e.g. std::vector). * Each element in rvecs corresponds to the specific marker in imgPoints. - * @param tvecs array of output translation vectors (e.g. std::vector>). + * @param tvecs array of output translation vectors (e.g. std::vector). * Each element in tvecs corresponds to the specific marker in imgPoints. + * @param _objPoints array of object points of all the marker corners * * This function receives the detected markers and returns their pose estimation respect to * the camera individually. So for each marker, one rotation and translation vector is returned. @@ -209,14 +255,14 @@ CV_EXPORTS_W void detectMarkers(InputArray image, Ptr &dictionary, O */ CV_EXPORTS_W void estimatePoseSingleMarkers(InputArrayOfArrays corners, float markerLength, InputArray cameraMatrix, InputArray distCoeffs, - OutputArrayOfArrays rvecs, OutputArrayOfArrays tvecs); + OutputArray rvecs, OutputArray tvecs, OutputArray _objPoints = noArray()); /** * @brief Board of markers * - * A board is a set of markers in the 3D space with a common cordinate system. + * A board is a set of markers in the 3D space with a common coordinate system. * The common form of a board of marker is a planar (2D) board, however any 3D layout can be used. * A Board object is composed by: * - The object points of the marker corners, i.e. their coordinates respect to the board system. @@ -226,23 +272,32 @@ CV_EXPORTS_W void estimatePoseSingleMarkers(InputArrayOfArrays corners, float ma class CV_EXPORTS_W Board { public: - // array of object points of all the marker corners in the board - // each marker include its 4 corners, i.e. for M markers, the size is Mx4 - std::vector< std::vector< Point3f > > objPoints; - - // the dictionary of markers employed for this board - Ptr dictionary; - - // vector of the identifiers of the markers in the board (same size than objPoints) - // The identifiers refers to the board dictionary - std::vector< int > ids; + /** + * @brief Provide way to create Board by passing necessary data. Specially needed in Python. + * + * @param objPoints array of object points of all the marker corners in the board + * @param dictionary the dictionary of markers employed for this board + * @param ids vector of the identifiers of the markers in the board + * + */ + CV_WRAP static Ptr create(InputArrayOfArrays objPoints, const Ptr &dictionary, InputArray ids); + /// array of object points of all the marker corners in the board + /// each marker include its 4 corners in CCW order. For M markers, the size is Mx4. + CV_PROP std::vector< std::vector< Point3f > > objPoints; + + /// the dictionary of markers employed for this board + CV_PROP Ptr dictionary; + + /// vector of the identifiers of the markers in the board (same size than objPoints) + /// The identifiers refers to the board dictionary + CV_PROP std::vector< int > ids; }; /** * @brief Planar board with grid arrangement of markers - * More common type of board. All markers are placed in the same plane in a grid arrangment. + * More common type of board. All markers are placed in the same plane in a grid arrangement. * The board can be drawn using drawPlanarBoard() function (@sa drawPlanarBoard) */ class CV_EXPORTS_W GridBoard : public Board { @@ -259,7 +314,7 @@ class CV_EXPORTS_W GridBoard : public Board { * * This function return the image of the GridBoard, ready to be printed. */ - void draw(Size outSize, OutputArray img, int marginSize = 0, int borderBits = 1); + CV_WRAP void draw(Size outSize, OutputArray img, int marginSize = 0, int borderBits = 1); /** @@ -277,29 +332,29 @@ class CV_EXPORTS_W GridBoard : public Board { * the marker size and marker separation. */ CV_WRAP static Ptr create(int markersX, int markersY, float markerLength, - float markerSeparation, Ptr &dictionary, int firstMarker = 0); + float markerSeparation, const Ptr &dictionary, int firstMarker = 0); /** * */ - Size getGridSize() const { return Size(_markersX, _markersY); } + CV_WRAP Size getGridSize() const { return Size(_markersX, _markersY); } /** * */ - float getMarkerLength() const { return _markerLength; } + CV_WRAP float getMarkerLength() const { return _markerLength; } /** * */ - float getMarkerSeparation() const { return _markerSeparation; } + CV_WRAP float getMarkerSeparation() const { return _markerSeparation; } private: // number of markers in X and Y directions int _markersX, _markersY; - // marker side lenght (normally in meters) + // marker side length (normally in meters) float _markerLength; // separation between markers in the grid @@ -322,8 +377,10 @@ class CV_EXPORTS_W GridBoard : public Board { * @param distCoeffs vector of distortion coefficients * \f$(k_1, k_2, p_1, p_2[, k_3[, k_4, k_5, k_6],[s_1, s_2, s_3, s_4]])\f$ of 4, 5, 8 or 12 elements * @param rvec Output vector (e.g. cv::Mat) corresponding to the rotation vector of the board - * (@sa Rodrigues). + * (see cv::Rodrigues). Used as initial guess if not empty. * @param tvec Output vector (e.g. cv::Mat) corresponding to the translation vector of the board. + * @param useExtrinsicGuess defines whether initial guess for \b rvec and \b tvec will be used or not. + * Used as initial guess if not empty. * * This function receives the detected markers and returns the pose of a marker board composed * by those markers. @@ -334,9 +391,9 @@ class CV_EXPORTS_W GridBoard : public Board { * The function returns the number of markers from the input employed for the board pose estimation. * Note that returning a 0 means the pose has not been estimated. */ -CV_EXPORTS_W int estimatePoseBoard(InputArrayOfArrays corners, InputArray ids, Ptr &board, - InputArray cameraMatrix, InputArray distCoeffs, OutputArray rvec, - OutputArray tvec); +CV_EXPORTS_W int estimatePoseBoard(InputArrayOfArrays corners, InputArray ids, const Ptr &board, + InputArray cameraMatrix, InputArray distCoeffs, InputOutputArray rvec, + InputOutputArray tvec, bool useExtrinsicGuess = false); @@ -373,8 +430,8 @@ CV_EXPORTS_W int estimatePoseBoard(InputArrayOfArrays corners, InputArray ids, P * homography, and all the marker corners in the board must have the same Z coordinate. */ CV_EXPORTS_W void refineDetectedMarkers( - InputArray image, Ptr &board, InputOutputArrayOfArrays detectedCorners, - InputOutputArray detectedIds, InputOutputArray rejectedCorners, + InputArray image,const Ptr &board, InputOutputArrayOfArrays detectedCorners, + InputOutputArray detectedIds, InputOutputArrayOfArrays rejectedCorners, InputArray cameraMatrix = noArray(), InputArray distCoeffs = noArray(), float minRepDistance = 10.f, float errorCorrectionRate = 3.f, bool checkAllOrders = true, OutputArray recoveredIdxs = noArray(), const Ptr ¶meters = DetectorParameters::create()); @@ -419,6 +476,8 @@ CV_EXPORTS_W void drawDetectedMarkers(InputOutputArray image, InputArrayOfArrays * * Given the pose estimation of a marker or board, this function draws the axis of the world * coordinate system, i.e. the system centered on the marker/board. Useful for debugging purposes. + * + * @deprecated use cv::drawFrameAxes */ CV_EXPORTS_W void drawAxis(InputOutputArray image, InputArray cameraMatrix, InputArray distCoeffs, InputArray rvec, InputArray tvec, float length); @@ -437,7 +496,7 @@ CV_EXPORTS_W void drawAxis(InputOutputArray image, InputArray cameraMatrix, Inpu * * This function returns a marker image in its canonical form (i.e. ready to be printed) */ -CV_EXPORTS_W void drawMarker(Ptr &dictionary, int id, int sidePixels, OutputArray img, +CV_EXPORTS_W void drawMarker(const Ptr &dictionary, int id, int sidePixels, OutputArray img, int borderBits = 1); @@ -457,7 +516,7 @@ CV_EXPORTS_W void drawMarker(Ptr &dictionary, int id, int sidePixels * This function return the image of a planar board, ready to be printed. It assumes * the Board layout specified is planar by ignoring the z coordinates of the object points. */ -CV_EXPORTS_W void drawPlanarBoard(Ptr &board, Size outSize, OutputArray img, +CV_EXPORTS_W void drawPlanarBoard(const Ptr &board, Size outSize, OutputArray img, int marginSize = 0, int borderBits = 1); @@ -474,7 +533,7 @@ void _drawPlanarBoardImpl(Board *board, Size outSize, OutputArray img, * @brief Calibrate a camera using aruco markers * * @param corners vector of detected marker corners in all frames. - * The corners should have the same format returned by detectMarkers (@sa detectMarkers). + * The corners should have the same format returned by detectMarkers (see #detectMarkers). * @param ids list of identifiers for each marker in corners * @param counter number of markers in each frame so that corners and ids can be split * @param board Marker Board layout @@ -491,20 +550,52 @@ void _drawPlanarBoardImpl(Board *board, Size outSize, OutputArray img, * from the model coordinate space (in which object points are specified) to the world coordinate * space, that is, a real position of the board pattern in the k-th pattern view (k=0.. *M* -1). * @param tvecs Output vector of translation vectors estimated for each pattern view. - * @param flags flags Different flags for the calibration process (@sa calibrateCamera) + * @param stdDeviationsIntrinsics Output vector of standard deviations estimated for intrinsic parameters. + * Order of deviations values: + * \f$(f_x, f_y, c_x, c_y, k_1, k_2, p_1, p_2, k_3, k_4, k_5, k_6 , s_1, s_2, s_3, + * s_4, \tau_x, \tau_y)\f$ If one of parameters is not estimated, it's deviation is equals to zero. + * @param stdDeviationsExtrinsics Output vector of standard deviations estimated for extrinsic parameters. + * Order of deviations values: \f$(R_1, T_1, \dotsc , R_M, T_M)\f$ where M is number of pattern views, + * \f$R_i, T_i\f$ are concatenated 1x3 vectors. + * @param perViewErrors Output vector of average re-projection errors estimated for each pattern view. + * @param flags flags Different flags for the calibration process (see #calibrateCamera for details). * @param criteria Termination criteria for the iterative optimization algorithm. * * This function calibrates a camera using an Aruco Board. The function receives a list of * detected markers from several views of the Board. The process is similar to the chessboard * calibration in calibrateCamera(). The function returns the final re-projection error. */ -CV_EXPORTS_W double calibrateCameraAruco( - InputArrayOfArrays corners, InputArray ids, InputArray counter, Ptr &board, +CV_EXPORTS_AS(calibrateCameraArucoExtended) double calibrateCameraAruco( + InputArrayOfArrays corners, InputArray ids, InputArray counter, const Ptr &board, Size imageSize, InputOutputArray cameraMatrix, InputOutputArray distCoeffs, - OutputArrayOfArrays rvecs = noArray(), OutputArrayOfArrays tvecs = noArray(), int flags = 0, + OutputArrayOfArrays rvecs, OutputArrayOfArrays tvecs, + OutputArray stdDeviationsIntrinsics, OutputArray stdDeviationsExtrinsics, + OutputArray perViewErrors, int flags = 0, TermCriteria criteria = TermCriteria(TermCriteria::COUNT + TermCriteria::EPS, 30, DBL_EPSILON)); +/** @brief It's the same function as #calibrateCameraAruco but without calibration error estimation. + */ +CV_EXPORTS_W double calibrateCameraAruco( + InputArrayOfArrays corners, InputArray ids, InputArray counter, const Ptr &board, + Size imageSize, InputOutputArray cameraMatrix, InputOutputArray distCoeffs, + OutputArrayOfArrays rvecs = noArray(), OutputArrayOfArrays tvecs = noArray(), int flags = 0, + TermCriteria criteria = TermCriteria(TermCriteria::COUNT + TermCriteria::EPS, 30, DBL_EPSILON)); + + +/** + * @brief Given a board configuration and a set of detected markers, returns the corresponding + * image points and object points to call solvePnP + * + * @param board Marker board layout. + * @param detectedCorners List of detected marker corners of the board. + * @param detectedIds List of identifiers for each marker. + * @param objPoints Vector of vectors of board marker points in the board coordinate space. + * @param imgPoints Vector of vectors of the projections of board marker corner points. +*/ +CV_EXPORTS_W void getBoardObjectAndImagePoints(const Ptr &board, InputArrayOfArrays detectedCorners, + InputArray detectedIds, OutputArray objPoints, OutputArray imgPoints); + //! @} } diff --git a/IPL/include/opencv/opencv2/aruco/charuco.hpp b/IPL/include/opencv/opencv2/aruco/charuco.hpp index ff448ce..2e6ae62 100644 --- a/IPL/include/opencv/opencv2/aruco/charuco.hpp +++ b/IPL/include/opencv/opencv2/aruco/charuco.hpp @@ -63,11 +63,11 @@ class CV_EXPORTS_W CharucoBoard : public Board { public: // vector of chessboard 3D corners precalculated - std::vector< Point3f > chessboardCorners; + CV_PROP std::vector< Point3f > chessboardCorners; // for each charuco corner, nearest marker id and nearest marker corner id of each marker - std::vector< std::vector< int > > nearestMarkerIdx; - std::vector< std::vector< int > > nearestMarkerCorners; + CV_PROP std::vector< std::vector< int > > nearestMarkerIdx; + CV_PROP std::vector< std::vector< int > > nearestMarkerCorners; /** * @brief Draw a ChArUco board @@ -80,7 +80,7 @@ class CV_EXPORTS_W CharucoBoard : public Board { * * This function return the image of the ChArUco board, ready to be printed. */ - void draw(Size outSize, OutputArray img, int marginSize = 0, int borderBits = 1); + CV_WRAP void draw(Size outSize, OutputArray img, int marginSize = 0, int borderBits = 1); /** @@ -98,22 +98,22 @@ class CV_EXPORTS_W CharucoBoard : public Board { * and the size of the markers and chessboard squares. */ CV_WRAP static Ptr create(int squaresX, int squaresY, float squareLength, - float markerLength, Ptr &dictionary); + float markerLength, const Ptr &dictionary); /** * */ - Size getChessboardSize() const { return Size(_squaresX, _squaresY); } + CV_WRAP Size getChessboardSize() const { return Size(_squaresX, _squaresY); } /** * */ - float getSquareLength() const { return _squareLength; } + CV_WRAP float getSquareLength() const { return _squareLength; } /** * */ - float getMarkerLength() const { return _markerLength; } + CV_WRAP float getMarkerLength() const { return _markerLength; } private: void _getNearestMarkerCorners(); @@ -124,7 +124,7 @@ class CV_EXPORTS_W CharucoBoard : public Board { // size of chessboard squares side (normally in meters) float _squareLength; - // marker side lenght (normally in meters) + // marker side length (normally in meters) float _markerLength; }; @@ -146,6 +146,7 @@ class CV_EXPORTS_W CharucoBoard : public Board { * \f$A = \vecthreethree{f_x}{0}{c_x}{0}{f_y}{c_y}{0}{0}{1}\f$ * @param distCoeffs optional vector of distortion coefficients * \f$(k_1, k_2, p_1, p_2[, k_3[, k_4, k_5, k_6],[s_1, s_2, s_3, s_4]])\f$ of 4, 5, 8 or 12 elements + * @param minMarkers number of adjacent markers that must be detected to return a charuco corner * * This function receives the detected markers and returns the 2D position of the chessboard corners * from a ChArUco board using the detected Aruco markers. If camera parameters are provided, @@ -155,10 +156,10 @@ class CV_EXPORTS_W CharucoBoard : public Board { * The function returns the number of interpolated corners. */ CV_EXPORTS_W int interpolateCornersCharuco(InputArrayOfArrays markerCorners, InputArray markerIds, - InputArray image, Ptr &board, + InputArray image, const Ptr &board, OutputArray charucoCorners, OutputArray charucoIds, InputArray cameraMatrix = noArray(), - InputArray distCoeffs = noArray()); + InputArray distCoeffs = noArray(), int minMarkers = 2); @@ -173,16 +174,18 @@ CV_EXPORTS_W int interpolateCornersCharuco(InputArrayOfArrays markerCorners, Inp * @param distCoeffs vector of distortion coefficients * \f$(k_1, k_2, p_1, p_2[, k_3[, k_4, k_5, k_6],[s_1, s_2, s_3, s_4]])\f$ of 4, 5, 8 or 12 elements * @param rvec Output vector (e.g. cv::Mat) corresponding to the rotation vector of the board - * (@sa Rodrigues). + * (see cv::Rodrigues). * @param tvec Output vector (e.g. cv::Mat) corresponding to the translation vector of the board. + * @param useExtrinsicGuess defines whether initial guess for \b rvec and \b tvec will be used or not. * * This function estimates a Charuco board pose from some detected corners. * The function checks if the input corners are enough and valid to perform pose estimation. * If pose estimation is valid, returns true, else returns false. */ CV_EXPORTS_W bool estimatePoseCharucoBoard(InputArray charucoCorners, InputArray charucoIds, - Ptr &board, InputArray cameraMatrix, - InputArray distCoeffs, OutputArray rvec, OutputArray tvec); + const Ptr &board, InputArray cameraMatrix, + InputArray distCoeffs, InputOutputArray rvec, + InputOutputArray tvec, bool useExtrinsicGuess = false); @@ -223,19 +226,36 @@ CV_EXPORTS_W void drawDetectedCornersCharuco(InputOutputArray image, InputArray * from the model coordinate space (in which object points are specified) to the world coordinate * space, that is, a real position of the board pattern in the k-th pattern view (k=0.. *M* -1). * @param tvecs Output vector of translation vectors estimated for each pattern view. - * @param flags flags Different flags for the calibration process (@sa calibrateCamera) + * @param stdDeviationsIntrinsics Output vector of standard deviations estimated for intrinsic parameters. + * Order of deviations values: + * \f$(f_x, f_y, c_x, c_y, k_1, k_2, p_1, p_2, k_3, k_4, k_5, k_6 , s_1, s_2, s_3, + * s_4, \tau_x, \tau_y)\f$ If one of parameters is not estimated, it's deviation is equals to zero. + * @param stdDeviationsExtrinsics Output vector of standard deviations estimated for extrinsic parameters. + * Order of deviations values: \f$(R_1, T_1, \dotsc , R_M, T_M)\f$ where M is number of pattern views, + * \f$R_i, T_i\f$ are concatenated 1x3 vectors. + * @param perViewErrors Output vector of average re-projection errors estimated for each pattern view. + * @param flags flags Different flags for the calibration process (see #calibrateCamera for details). * @param criteria Termination criteria for the iterative optimization algorithm. * * This function calibrates a camera using a set of corners of a Charuco Board. The function * receives a list of detected corners and its identifiers from several views of the Board. * The function returns the final re-projection error. */ -CV_EXPORTS_W double calibrateCameraCharuco( - InputArrayOfArrays charucoCorners, InputArrayOfArrays charucoIds, Ptr &board, +CV_EXPORTS_AS(calibrateCameraCharucoExtended) double calibrateCameraCharuco( + InputArrayOfArrays charucoCorners, InputArrayOfArrays charucoIds, const Ptr &board, Size imageSize, InputOutputArray cameraMatrix, InputOutputArray distCoeffs, - OutputArrayOfArrays rvecs = noArray(), OutputArrayOfArrays tvecs = noArray(), int flags = 0, + OutputArrayOfArrays rvecs, OutputArrayOfArrays tvecs, + OutputArray stdDeviationsIntrinsics, OutputArray stdDeviationsExtrinsics, + OutputArray perViewErrors, int flags = 0, TermCriteria criteria = TermCriteria(TermCriteria::COUNT + TermCriteria::EPS, 30, DBL_EPSILON)); +/** @brief It's the same function as #calibrateCameraCharuco but without calibration error estimation. +*/ +CV_EXPORTS_W double calibrateCameraCharuco( + InputArrayOfArrays charucoCorners, InputArrayOfArrays charucoIds, const Ptr &board, + Size imageSize, InputOutputArray cameraMatrix, InputOutputArray distCoeffs, + OutputArrayOfArrays rvecs = noArray(), OutputArrayOfArrays tvecs = noArray(), int flags = 0, + TermCriteria criteria = TermCriteria(TermCriteria::COUNT + TermCriteria::EPS, 30, DBL_EPSILON)); @@ -309,7 +329,7 @@ CV_EXPORTS_W void drawDetectedDiamonds(InputOutputArray image, InputArrayOfArray * This function return the image of a ChArUco marker, ready to be printed. */ // TODO cannot be exported yet; conversion from/to Vec4i is not wrapped in core -CV_EXPORTS void drawCharucoDiamond(Ptr &dictionary, Vec4i ids, int squareLength, +CV_EXPORTS void drawCharucoDiamond(const Ptr &dictionary, Vec4i ids, int squareLength, int markerLength, OutputArray img, int marginSize = 0, int borderBits = 1); diff --git a/IPL/include/opencv/opencv2/aruco/dictionary.hpp b/IPL/include/opencv/opencv2/aruco/dictionary.hpp index 3ef1a82..27c7e5d 100644 --- a/IPL/include/opencv/opencv2/aruco/dictionary.hpp +++ b/IPL/include/opencv/opencv2/aruco/dictionary.hpp @@ -84,14 +84,14 @@ class CV_EXPORTS_W Dictionary { /** * @see generateCustomDictionary */ - CV_WRAP_AS(create) static Ptr create(int nMarkers, int markerSize); + CV_WRAP_AS(create) static Ptr create(int nMarkers, int markerSize, int randomSeed=0); /** * @see generateCustomDictionary */ CV_WRAP_AS(create_from) static Ptr create(int nMarkers, int markerSize, - Ptr &baseDictionary); + const Ptr &baseDictionary, int randomSeed=0); /** * @see getPredefinedDictionary @@ -114,19 +114,19 @@ class CV_EXPORTS_W Dictionary { /** * @brief Draw a canonical marker image */ - void drawMarker(int id, int sidePixels, OutputArray _img, int borderBits = 1) const; + CV_WRAP void drawMarker(int id, int sidePixels, OutputArray _img, int borderBits = 1) const; /** * @brief Transform matrix of bits to list of bytes in the 4 rotations */ - static Mat getByteListFromBits(const Mat &bits); + CV_WRAP static Mat getByteListFromBits(const Mat &bits); /** * @brief Transform list of bytes to matrix of bits */ - static Mat getBitsFromByteList(const Mat &byteList, int markerSize); + CV_WRAP static Mat getBitsFromByteList(const Mat &byteList, int markerSize); }; @@ -138,7 +138,7 @@ class CV_EXPORTS_W Dictionary { * - DICT_ARUCO_ORIGINAL: standard ArUco Library Markers. 1024 markers, 5x5 bits, 0 minimum distance */ -enum CV_EXPORTS_W_SIMPLE PREDEFINED_DICTIONARY_NAME { +enum PREDEFINED_DICTIONARY_NAME { DICT_4X4_50 = 0, DICT_4X4_100, DICT_4X4_250, @@ -155,7 +155,11 @@ enum CV_EXPORTS_W_SIMPLE PREDEFINED_DICTIONARY_NAME { DICT_7X7_100, DICT_7X7_250, DICT_7X7_1000, - DICT_ARUCO_ORIGINAL + DICT_ARUCO_ORIGINAL, + DICT_APRILTAG_16h5, ///< 4x4 bits, minimum hamming distance between any two codes = 5, 30 codes + DICT_APRILTAG_25h9, ///< 5x5 bits, minimum hamming distance between any two codes = 9, 35 codes + DICT_APRILTAG_36h10, ///< 6x6 bits, minimum hamming distance between any two codes = 10, 2320 codes + DICT_APRILTAG_36h11 ///< 6x6 bits, minimum hamming distance between any two codes = 11, 587 codes }; @@ -176,7 +180,8 @@ CV_EXPORTS_W Ptr getPredefinedDictionary(int dict); */ CV_EXPORTS_AS(custom_dictionary) Ptr generateCustomDictionary( int nMarkers, - int markerSize); + int markerSize, + int randomSeed=0); /** @@ -185,6 +190,7 @@ CV_EXPORTS_AS(custom_dictionary) Ptr generateCustomDictionary( * @param nMarkers number of markers in the dictionary * @param markerSize number of bits per dimension of each markers * @param baseDictionary Include the markers in this dictionary at the beginning (optional) + * @param randomSeed a user supplied seed for theRNG() * * This function creates a new dictionary composed by nMarkers markers and each markers composed * by markerSize x markerSize bits. If baseDictionary is provided, its markers are directly @@ -194,7 +200,8 @@ CV_EXPORTS_AS(custom_dictionary) Ptr generateCustomDictionary( CV_EXPORTS_AS(custom_dictionary_from) Ptr generateCustomDictionary( int nMarkers, int markerSize, - Ptr &baseDictionary); + const Ptr &baseDictionary, + int randomSeed=0); diff --git a/IPL/include/opencv/opencv2/bgsegm.hpp b/IPL/include/opencv/opencv2/bgsegm.hpp index 5a4ae3f..8ace5d9 100644 --- a/IPL/include/opencv/opencv2/bgsegm.hpp +++ b/IPL/include/opencv/opencv2/bgsegm.hpp @@ -183,7 +183,193 @@ class CV_EXPORTS_W BackgroundSubtractorGMG : public BackgroundSubtractor @param decisionThreshold Threshold value, above which it is marked foreground, else background. */ CV_EXPORTS_W Ptr createBackgroundSubtractorGMG(int initializationFrames=120, - double decisionThreshold=0.8); + double decisionThreshold=0.8); + +/** @brief Background subtraction based on counting. + + About as fast as MOG2 on a high end system. + More than twice faster than MOG2 on cheap hardware (benchmarked on Raspberry Pi3). + + %Algorithm by Sagi Zeevi ( https://github.com/sagi-z/BackgroundSubtractorCNT ) +*/ +class CV_EXPORTS_W BackgroundSubtractorCNT : public BackgroundSubtractor +{ +public: + // BackgroundSubtractor interface + CV_WRAP virtual void apply(InputArray image, OutputArray fgmask, double learningRate=-1) CV_OVERRIDE = 0; + CV_WRAP virtual void getBackgroundImage(OutputArray backgroundImage) const CV_OVERRIDE = 0; + + /** @brief Returns number of frames with same pixel color to consider stable. + */ + CV_WRAP virtual int getMinPixelStability() const = 0; + /** @brief Sets the number of frames with same pixel color to consider stable. + */ + CV_WRAP virtual void setMinPixelStability(int value) = 0; + + /** @brief Returns maximum allowed credit for a pixel in history. + */ + CV_WRAP virtual int getMaxPixelStability() const = 0; + /** @brief Sets the maximum allowed credit for a pixel in history. + */ + CV_WRAP virtual void setMaxPixelStability(int value) = 0; + + /** @brief Returns if we're giving a pixel credit for being stable for a long time. + */ + CV_WRAP virtual bool getUseHistory() const = 0; + /** @brief Sets if we're giving a pixel credit for being stable for a long time. + */ + CV_WRAP virtual void setUseHistory(bool value) = 0; + + /** @brief Returns if we're parallelizing the algorithm. + */ + CV_WRAP virtual bool getIsParallel() const = 0; + /** @brief Sets if we're parallelizing the algorithm. + */ + CV_WRAP virtual void setIsParallel(bool value) = 0; +}; + +/** @brief Creates a CNT Background Subtractor + +@param minPixelStability number of frames with same pixel color to consider stable +@param useHistory determines if we're giving a pixel credit for being stable for a long time +@param maxPixelStability maximum allowed credit for a pixel in history +@param isParallel determines if we're parallelizing the algorithm + */ + +CV_EXPORTS_W Ptr +createBackgroundSubtractorCNT(int minPixelStability = 15, + bool useHistory = true, + int maxPixelStability = 15*60, + bool isParallel = true); + +enum LSBPCameraMotionCompensation { + LSBP_CAMERA_MOTION_COMPENSATION_NONE = 0, + LSBP_CAMERA_MOTION_COMPENSATION_LK +}; + +/** @brief Implementation of the different yet better algorithm which is called GSOC, as it was implemented during GSOC and was not originated from any paper. + +This algorithm demonstrates better performance on CDNET 2014 dataset compared to other algorithms in OpenCV. + */ +class CV_EXPORTS_W BackgroundSubtractorGSOC : public BackgroundSubtractor +{ +public: + // BackgroundSubtractor interface + CV_WRAP virtual void apply(InputArray image, OutputArray fgmask, double learningRate=-1) CV_OVERRIDE = 0; + + CV_WRAP virtual void getBackgroundImage(OutputArray backgroundImage) const CV_OVERRIDE = 0; +}; + +/** @brief Background Subtraction using Local SVD Binary Pattern. More details about the algorithm can be found at @cite LGuo2016 + */ +class CV_EXPORTS_W BackgroundSubtractorLSBP : public BackgroundSubtractor +{ +public: + // BackgroundSubtractor interface + CV_WRAP virtual void apply(InputArray image, OutputArray fgmask, double learningRate=-1) CV_OVERRIDE = 0; + + CV_WRAP virtual void getBackgroundImage(OutputArray backgroundImage) const CV_OVERRIDE = 0; +}; + +/** @brief This is for calculation of the LSBP descriptors. + */ +class CV_EXPORTS_W BackgroundSubtractorLSBPDesc +{ +public: + static void calcLocalSVDValues(OutputArray localSVDValues, const Mat& frame); + + static void computeFromLocalSVDValues(OutputArray desc, const Mat& localSVDValues, const Point2i* LSBPSamplePoints); + + static void compute(OutputArray desc, const Mat& frame, const Point2i* LSBPSamplePoints); +}; + +/** @brief Creates an instance of BackgroundSubtractorGSOC algorithm. + +Implementation of the different yet better algorithm which is called GSOC, as it was implemented during GSOC and was not originated from any paper. + +@param mc Whether to use camera motion compensation. +@param nSamples Number of samples to maintain at each point of the frame. +@param replaceRate Probability of replacing the old sample - how fast the model will update itself. +@param propagationRate Probability of propagating to neighbors. +@param hitsThreshold How many positives the sample must get before it will be considered as a possible replacement. +@param alpha Scale coefficient for threshold. +@param beta Bias coefficient for threshold. +@param blinkingSupressionDecay Blinking supression decay factor. +@param blinkingSupressionMultiplier Blinking supression multiplier. +@param noiseRemovalThresholdFacBG Strength of the noise removal for background points. +@param noiseRemovalThresholdFacFG Strength of the noise removal for foreground points. + */ +CV_EXPORTS_W Ptr createBackgroundSubtractorGSOC(int mc = LSBP_CAMERA_MOTION_COMPENSATION_NONE, int nSamples = 20, float replaceRate = 0.003f, float propagationRate = 0.01f, int hitsThreshold = 32, float alpha = 0.01f, float beta = 0.0022f, float blinkingSupressionDecay = 0.1f, float blinkingSupressionMultiplier = 0.1f, float noiseRemovalThresholdFacBG = 0.0004f, float noiseRemovalThresholdFacFG = 0.0008f); + +/** @brief Creates an instance of BackgroundSubtractorLSBP algorithm. + +Background Subtraction using Local SVD Binary Pattern. More details about the algorithm can be found at @cite LGuo2016 + +@param mc Whether to use camera motion compensation. +@param nSamples Number of samples to maintain at each point of the frame. +@param LSBPRadius LSBP descriptor radius. +@param Tlower Lower bound for T-values. See @cite LGuo2016 for details. +@param Tupper Upper bound for T-values. See @cite LGuo2016 for details. +@param Tinc Increase step for T-values. See @cite LGuo2016 for details. +@param Tdec Decrease step for T-values. See @cite LGuo2016 for details. +@param Rscale Scale coefficient for threshold values. +@param Rincdec Increase/Decrease step for threshold values. +@param noiseRemovalThresholdFacBG Strength of the noise removal for background points. +@param noiseRemovalThresholdFacFG Strength of the noise removal for foreground points. +@param LSBPthreshold Threshold for LSBP binary string. +@param minCount Minimal number of matches for sample to be considered as foreground. + */ +CV_EXPORTS_W Ptr createBackgroundSubtractorLSBP(int mc = LSBP_CAMERA_MOTION_COMPENSATION_NONE, int nSamples = 20, int LSBPRadius = 16, float Tlower = 2.0f, float Tupper = 32.0f, float Tinc = 1.0f, float Tdec = 0.05f, float Rscale = 10.0f, float Rincdec = 0.005f, float noiseRemovalThresholdFacBG = 0.0004f, float noiseRemovalThresholdFacFG = 0.0008f, int LSBPthreshold = 8, int minCount = 2); + +/** @brief Synthetic frame sequence generator for testing background subtraction algorithms. + + It will generate the moving object on top of the background. + It will apply some distortion to the background to make the test more complex. + */ +class CV_EXPORTS_W SyntheticSequenceGenerator : public Algorithm +{ +private: + const double amplitude; + const double wavelength; + const double wavespeed; + const double objspeed; + unsigned timeStep; + Point2d pos; + Point2d dir; + Mat background; + Mat object; + RNG rng; + +public: + /** @brief Creates an instance of SyntheticSequenceGenerator. + + @param background Background image for object. + @param object Object image which will move slowly over the background. + @param amplitude Amplitude of wave distortion applied to background. + @param wavelength Length of waves in distortion applied to background. + @param wavespeed How fast waves will move. + @param objspeed How fast object will fly over background. + */ + CV_WRAP SyntheticSequenceGenerator(InputArray background, InputArray object, double amplitude, double wavelength, double wavespeed, double objspeed); + + /** @brief Obtain the next frame in the sequence. + + @param frame Output frame. + @param gtMask Output ground-truth (reference) segmentation mask object/background. + */ + CV_WRAP void getNextFrame(OutputArray frame, OutputArray gtMask); +}; + +/** @brief Creates an instance of SyntheticSequenceGenerator. + +@param background Background image for object. +@param object Object image which will move slowly over the background. +@param amplitude Amplitude of wave distortion applied to background. +@param wavelength Length of waves in distortion applied to background. +@param wavespeed How fast waves will move. +@param objspeed How fast object will fly over background. + */ +CV_EXPORTS_W Ptr createSyntheticSequenceGenerator(InputArray background, InputArray object, double amplitude = 2.0, double wavelength = 20.0, double wavespeed = 0.2, double objspeed = 6.0); //! @} diff --git a/IPL/include/opencv/opencv2/bioinspired/retina.hpp b/IPL/include/opencv/opencv2/bioinspired/retina.hpp index 4ed6f3a..91c8148 100644 --- a/IPL/include/opencv/opencv2/bioinspired/retina.hpp +++ b/IPL/include/opencv/opencv2/bioinspired/retina.hpp @@ -146,7 +146,7 @@ enum { @endcode */ - struct RetinaParameters{ + struct RetinaParameters{ //! Outer Plexiform Layer (OPL) and Inner Plexiform Layer Parvocellular (IplParvo) parameters struct OPLandIplParvoParameters{ OPLandIplParvoParameters():colorMode(true), @@ -208,7 +208,7 @@ class CV_EXPORTS_W Retina : public Algorithm { public: - + /** @brief Retreive retina input buffer size @return the retina input buffer size */ @@ -226,8 +226,9 @@ class CV_EXPORTS_W Retina : public Algorithm { - warning, Exceptions are thrown if read XML file is not valid @param retinaParameterFile the parameters filename @param applyDefaultSetupOnFailure set to true if an error must be thrown on error - You can retreive the current parameers structure using method Retina::getParameters and update - it before running method Retina::setup + + You can retrieve the current parameters structure using the method Retina::getParameters and update + it before running method Retina::setup. */ CV_WRAP virtual void setup(String retinaParameterFile="", const bool applyDefaultSetupOnFailure=true)=0; @@ -259,7 +260,7 @@ class CV_EXPORTS_W Retina : public Algorithm { CV_WRAP virtual void write( String fs ) const=0; /** @overload */ - virtual void write( FileStorage& fs ) const=0; + virtual void write( FileStorage& fs ) const CV_OVERRIDE = 0; /** @brief Setup the OPL and IPL parvo channels (see biologocal model) @@ -421,37 +422,30 @@ class CV_EXPORTS_W Retina : public Algorithm { Retina::getParvo methods */ CV_WRAP virtual void activateContoursProcessing(const bool activate)=0; -}; - -//! @relates bioinspired::Retina -//! @{ - -/** @overload */ -CV_EXPORTS_W Ptr createRetina(Size inputSize); -/** @brief Constructors from standardized interfaces : retreive a smart pointer to a Retina instance - -@param inputSize the input frame size -@param colorMode the chosen processing mode : with or without color processing -@param colorSamplingMethod specifies which kind of color sampling will be used : -- cv::bioinspired::RETINA_COLOR_RANDOM: each pixel position is either R, G or B in a random choice -- cv::bioinspired::RETINA_COLOR_DIAGONAL: color sampling is RGBRGBRGB..., line 2 BRGBRGBRG..., line 3, GBRGBRGBR... -- cv::bioinspired::RETINA_COLOR_BAYER: standard bayer sampling -@param useRetinaLogSampling activate retina log sampling, if true, the 2 following parameters can -be used -@param reductionFactor only usefull if param useRetinaLogSampling=true, specifies the reduction -factor of the output frame (as the center (fovea) is high resolution and corners can be -underscaled, then a reduction of the output is allowed without precision leak -@param samplingStrenght only usefull if param useRetinaLogSampling=true, specifies the strenght of -the log scale that is applied - */ -CV_EXPORTS_W Ptr createRetina(Size inputSize, const bool colorMode, int colorSamplingMethod=RETINA_COLOR_BAYER, const bool useRetinaLogSampling=false, const float reductionFactor=1.0f, const float samplingStrenght=10.0f); - -#ifdef HAVE_OPENCV_OCL -Ptr createRetina_OCL(Size inputSize); -Ptr createRetina_OCL(Size inputSize, const bool colorMode, int colorSamplingMethod=RETINA_COLOR_BAYER, const bool useRetinaLogSampling=false, const float reductionFactor=1.0f, const float samplingStrenght=10.0f); -#endif -//! @} + /** @overload */ + CV_WRAP static Ptr create(Size inputSize); + /** @brief Constructors from standardized interfaces : retreive a smart pointer to a Retina instance + + @param inputSize the input frame size + @param colorMode the chosen processing mode : with or without color processing + @param colorSamplingMethod specifies which kind of color sampling will be used : + - cv::bioinspired::RETINA_COLOR_RANDOM: each pixel position is either R, G or B in a random choice + - cv::bioinspired::RETINA_COLOR_DIAGONAL: color sampling is RGBRGBRGB..., line 2 BRGBRGBRG..., line 3, GBRGBRGBR... + - cv::bioinspired::RETINA_COLOR_BAYER: standard bayer sampling + @param useRetinaLogSampling activate retina log sampling, if true, the 2 following parameters can + be used + @param reductionFactor only usefull if param useRetinaLogSampling=true, specifies the reduction + factor of the output frame (as the center (fovea) is high resolution and corners can be + underscaled, then a reduction of the output is allowed without precision leak + @param samplingStrength only usefull if param useRetinaLogSampling=true, specifies the strength of + the log scale that is applied + */ + CV_WRAP static Ptr create(Size inputSize, const bool colorMode, + int colorSamplingMethod=RETINA_COLOR_BAYER, + const bool useRetinaLogSampling=false, + const float reductionFactor=1.0f, const float samplingStrength=10.0f); +}; //! @} diff --git a/IPL/include/opencv/opencv2/bioinspired/retinafasttonemapping.hpp b/IPL/include/opencv/opencv2/bioinspired/retinafasttonemapping.hpp index c65709d..ba1a872 100644 --- a/IPL/include/opencv/opencv2/bioinspired/retinafasttonemapping.hpp +++ b/IPL/include/opencv/opencv2/bioinspired/retinafasttonemapping.hpp @@ -126,10 +126,10 @@ class CV_EXPORTS_W RetinaFastToneMapping : public Algorithm (default is 1, see reference paper) */ CV_WRAP virtual void setup(const float photoreceptorsNeighborhoodRadius=3.f, const float ganglioncellsNeighborhoodRadius=1.f, const float meanLuminanceModulatorK=1.f)=0; + + CV_WRAP static Ptr create(Size inputSize); }; -//! @relates bioinspired::RetinaFastToneMapping -CV_EXPORTS_W Ptr createRetinaFastToneMapping(Size inputSize); //! @} diff --git a/IPL/include/opencv/opencv2/bioinspired/transientareassegmentationmodule.hpp b/IPL/include/opencv/opencv2/bioinspired/transientareassegmentationmodule.hpp index b11b61d..d5f5b2f 100644 --- a/IPL/include/opencv/opencv2/bioinspired/transientareassegmentationmodule.hpp +++ b/IPL/include/opencv/opencv2/bioinspired/transientareassegmentationmodule.hpp @@ -80,7 +80,7 @@ namespace bioinspired /** @brief parameter structure that stores the transient events detector setup parameters */ struct SegmentationParameters{ // CV_EXPORTS_W_MAP to export to python native dictionnaries - // default structure instance construction with default values + // default structure instance construction with default values SegmentationParameters(): thresholdON(100), thresholdOFF(100), @@ -171,7 +171,7 @@ class CV_EXPORTS_W TransientAreasSegmentationModule: public Algorithm /** @brief write xml/yml formated parameters information @param fs : a cv::Filestorage object ready to be filled */ - virtual void write( cv::FileStorage& fs ) const=0; + virtual void write( cv::FileStorage& fs ) const CV_OVERRIDE = 0; /** @brief main processing method, get result using methods getSegmentationPicture() @param inputToSegment : the image to process, it must match the instance buffer size ! @@ -180,20 +180,19 @@ class CV_EXPORTS_W TransientAreasSegmentationModule: public Algorithm CV_WRAP virtual void run(InputArray inputToSegment, const int channelIndex=0)=0; /** @brief access function - @return the last segmentation result: a boolean picture which is resampled between 0 and 255 for a display purpose - */ + return the last segmentation result: a boolean picture which is resampled between 0 and 255 for a display purpose + */ CV_WRAP virtual void getSegmentationPicture(OutputArray transientAreas)=0; /** @brief cleans all the buffers of the instance */ CV_WRAP virtual void clearAllBuffers()=0; -}; -/** @brief allocator -@param inputSize : size of the images input to segment (output will be the same size) -@relates bioinspired::TransientAreasSegmentationModule - */ -CV_EXPORTS_W Ptr createTransientAreasSegmentationModule(Size inputSize); + /** @brief allocator + @param inputSize : size of the images input to segment (output will be the same size) + */ + CV_WRAP static Ptr create(Size inputSize); +}; //! @} diff --git a/IPL/include/opencv/opencv2/calib3d.hpp b/IPL/include/opencv/opencv2/calib3d.hpp index e26e8c1..517c4cd 100644 --- a/IPL/include/opencv/opencv2/calib3d.hpp +++ b/IPL/include/opencv/opencv2/calib3d.hpp @@ -41,8 +41,8 @@ // //M*/ -#ifndef __OPENCV_CALIB3D_HPP__ -#define __OPENCV_CALIB3D_HPP__ +#ifndef OPENCV_CALIB3D_HPP +#define OPENCV_CALIB3D_HPP #include "opencv2/core.hpp" #include "opencv2/features2d.hpp" @@ -51,85 +51,254 @@ /** @defgroup calib3d Camera Calibration and 3D Reconstruction -The functions in this section use a so-called pinhole camera model. In this model, a scene view is -formed by projecting 3D points into the image plane using a perspective transformation. +The functions in this section use a so-called pinhole camera model. The view of a scene +is obtained by projecting a scene's 3D point \f$P_w\f$ into the image plane using a perspective +transformation which forms the corresponding pixel \f$p\f$. Both \f$P_w\f$ and \f$p\f$ are +represented in homogeneous coordinates, i.e. as 3D and 2D homogeneous vector respectively. You will +find a brief introduction to projective geometry, homogeneous vectors and homogeneous +transformations at the end of this section's introduction. For more succinct notation, we often drop +the 'homogeneous' and say vector instead of homogeneous vector. -\f[s \; m' = A [R|t] M'\f] +The distortion-free projective transformation given by a pinhole camera model is shown below. -or +\f[s \; p = A \begin{bmatrix} R|t \end{bmatrix} P_w,\f] -\f[s \vecthree{u}{v}{1} = \vecthreethree{f_x}{0}{c_x}{0}{f_y}{c_y}{0}{0}{1} +where \f$P_w\f$ is a 3D point expressed with respect to the world coordinate system, +\f$p\f$ is a 2D pixel in the image plane, \f$A\f$ is the intrinsic camera matrix, +\f$R\f$ and \f$t\f$ are the rotation and translation that describe the change of coordinates from +world to camera coordinate systems (or camera frame) and \f$s\f$ is the projective transformation's +arbitrary scaling and not part of the camera model. + +The intrinsic camera matrix \f$A\f$ (notation used as in @cite Zhang2000 and also generally notated +as \f$K\f$) projects 3D points given in the camera coordinate system to 2D pixel coordinates, i.e. + +\f[p = A P_c.\f] + +The camera matrix \f$A\f$ is composed of the focal lengths \f$f_x\f$ and \f$f_y\f$, which are +expressed in pixel units, and the principal point \f$(c_x, c_y)\f$, that is usually close to the +image center: + +\f[A = \vecthreethree{f_x}{0}{c_x}{0}{f_y}{c_y}{0}{0}{1},\f] + +and thus + +\f[s \vecthree{u}{v}{1} = \vecthreethree{f_x}{0}{c_x}{0}{f_y}{c_y}{0}{0}{1} \vecthree{X_c}{Y_c}{Z_c}.\f] + +The matrix of intrinsic parameters does not depend on the scene viewed. So, once estimated, it can +be re-used as long as the focal length is fixed (in case of a zoom lens). Thus, if an image from the +camera is scaled by a factor, all of these parameters need to be scaled (multiplied/divided, +respectively) by the same factor. + +The joint rotation-translation matrix \f$[R|t]\f$ is the matrix product of a projective +transformation and a homogeneous transformation. The 3-by-4 projective transformation maps 3D points +represented in camera coordinates to 2D poins in the image plane and represented in normalized +camera coordinates \f$x' = X_c / Z_c\f$ and \f$y' = Y_c / Z_c\f$: + +\f[Z_c \begin{bmatrix} +x' \\ +y' \\ +1 +\end{bmatrix} = \begin{bmatrix} +1 & 0 & 0 & 0 \\ +0 & 1 & 0 & 0 \\ +0 & 0 & 1 & 0 +\end{bmatrix} \begin{bmatrix} -r_{11} & r_{12} & r_{13} & t_1 \\ -r_{21} & r_{22} & r_{23} & t_2 \\ -r_{31} & r_{32} & r_{33} & t_3 +X_c \\ +Y_c \\ +Z_c \\ +1 +\end{bmatrix}.\f] + +The homogeneous transformation is encoded by the extrinsic parameters \f$R\f$ and \f$t\f$ and +represents the change of basis from world coordinate system \f$w\f$ to the camera coordinate sytem +\f$c\f$. Thus, given the representation of the point \f$P\f$ in world coordinates, \f$P_w\f$, we +obtain \f$P\f$'s representation in the camera coordinate system, \f$P_c\f$, by + +\f[P_c = \begin{bmatrix} +R & t \\ +0 & 1 +\end{bmatrix} P_w,\f] + +This homogeneous transformation is composed out of \f$R\f$, a 3-by-3 rotation matrix, and \f$t\f$, a +3-by-1 translation vector: + +\f[\begin{bmatrix} +R & t \\ +0 & 1 +\end{bmatrix} = \begin{bmatrix} +r_{11} & r_{12} & r_{13} & t_x \\ +r_{21} & r_{22} & r_{23} & t_y \\ +r_{31} & r_{32} & r_{33} & t_z \\ +0 & 0 & 0 & 1 +\end{bmatrix}, +\f] + +and therefore + +\f[\begin{bmatrix} +X_c \\ +Y_c \\ +Z_c \\ +1 +\end{bmatrix} = \begin{bmatrix} +r_{11} & r_{12} & r_{13} & t_x \\ +r_{21} & r_{22} & r_{23} & t_y \\ +r_{31} & r_{32} & r_{33} & t_z \\ +0 & 0 & 0 & 1 \end{bmatrix} \begin{bmatrix} -X \\ -Y \\ -Z \\ +X_w \\ +Y_w \\ +Z_w \\ +1 +\end{bmatrix}.\f] + +Combining the projective transformation and the homogeneous transformation, we obtain the projective +transformation that maps 3D points in world coordinates into 2D points in the image plane and in +normalized camera coordinates: + +\f[Z_c \begin{bmatrix} +x' \\ +y' \\ +1 +\end{bmatrix} = \begin{bmatrix} R|t \end{bmatrix} \begin{bmatrix} +X_w \\ +Y_w \\ +Z_w \\ +1 +\end{bmatrix} = \begin{bmatrix} +r_{11} & r_{12} & r_{13} & t_x \\ +r_{21} & r_{22} & r_{23} & t_y \\ +r_{31} & r_{32} & r_{33} & t_z +\end{bmatrix} +\begin{bmatrix} +X_w \\ +Y_w \\ +Z_w \\ +1 +\end{bmatrix},\f] + +with \f$x' = X_c / Z_c\f$ and \f$y' = Y_c / Z_c\f$. Putting the equations for instrincs and extrinsics together, we can write out +\f$s \; p = A \begin{bmatrix} R|t \end{bmatrix} P_w\f$ as + +\f[s \vecthree{u}{v}{1} = \vecthreethree{f_x}{0}{c_x}{0}{f_y}{c_y}{0}{0}{1} +\begin{bmatrix} +r_{11} & r_{12} & r_{13} & t_x \\ +r_{21} & r_{22} & r_{23} & t_y \\ +r_{31} & r_{32} & r_{33} & t_z +\end{bmatrix} +\begin{bmatrix} +X_w \\ +Y_w \\ +Z_w \\ 1 +\end{bmatrix}.\f] + +If \f$Z_c \ne 0\f$, the transformation above is equivalent to the following, + +\f[\begin{bmatrix} +u \\ +v +\end{bmatrix} = \begin{bmatrix} +f_x X_c/Z_c + c_x \\ +f_y Y_c/Z_c + c_y \end{bmatrix}\f] -where: - -- \f$(X, Y, Z)\f$ are the coordinates of a 3D point in the world coordinate space -- \f$(u, v)\f$ are the coordinates of the projection point in pixels -- \f$A\f$ is a camera matrix, or a matrix of intrinsic parameters -- \f$(cx, cy)\f$ is a principal point that is usually at the image center -- \f$fx, fy\f$ are the focal lengths expressed in pixel units. - -Thus, if an image from the camera is scaled by a factor, all of these parameters should be scaled -(multiplied/divided, respectively) by the same factor. The matrix of intrinsic parameters does not -depend on the scene viewed. So, once estimated, it can be re-used as long as the focal length is -fixed (in case of zoom lens). The joint rotation-translation matrix \f$[R|t]\f$ is called a matrix of -extrinsic parameters. It is used to describe the camera motion around a static scene, or vice versa, -rigid motion of an object in front of a still camera. That is, \f$[R|t]\f$ translates coordinates of a -point \f$(X, Y, Z)\f$ to a coordinate system, fixed with respect to the camera. The transformation above -is equivalent to the following (when \f$z \ne 0\f$ ): - -\f[\begin{array}{l} -\vecthree{x}{y}{z} = R \vecthree{X}{Y}{Z} + t \\ -x' = x/z \\ -y' = y/z \\ -u = f_x*x' + c_x \\ -v = f_y*y' + c_y -\end{array}\f] - -Real lenses usually have some distortion, mostly radial distortion and slight tangential distortion. +with + +\f[\vecthree{X_c}{Y_c}{Z_c} = \begin{bmatrix} +R|t +\end{bmatrix} \begin{bmatrix} +X_w \\ +Y_w \\ +Z_w \\ +1 +\end{bmatrix}.\f] + +The following figure illustrates the pinhole camera model. + +![Pinhole camera model](pics/pinhole_camera_model.png) + +Real lenses usually have some distortion, mostly radial distortion, and slight tangential distortion. So, the above model is extended as: -\f[\begin{array}{l} -\vecthree{x}{y}{z} = R \vecthree{X}{Y}{Z} + t \\ -x' = x/z \\ -y' = y/z \\ -x'' = x' \frac{1 + k_1 r^2 + k_2 r^4 + k_3 r^6}{1 + k_4 r^2 + k_5 r^4 + k_6 r^6} + 2 p_1 x' y' + p_2(r^2 + 2 x'^2) + s_1 r^2 + s_2 r^4 \\ -y'' = y' \frac{1 + k_1 r^2 + k_2 r^4 + k_3 r^6}{1 + k_4 r^2 + k_5 r^4 + k_6 r^6} + p_1 (r^2 + 2 y'^2) + 2 p_2 x' y' + s_3 r^2 + s_4 r^4 \\ -\text{where} \quad r^2 = x'^2 + y'^2 \\ -u = f_x*x'' + c_x \\ -v = f_y*y'' + c_y -\end{array}\f] - -\f$k_1\f$, \f$k_2\f$, \f$k_3\f$, \f$k_4\f$, \f$k_5\f$, and \f$k_6\f$ are radial distortion coefficients. \f$p_1\f$ and \f$p_2\f$ are -tangential distortion coefficients. \f$s_1\f$, \f$s_2\f$, \f$s_3\f$, and \f$s_4\f$, are the thin prism distortion -coefficients. Higher-order coefficients are not considered in OpenCV. - -In some cases the image sensor may be tilted in order to focus an oblique plane in front of the -camera (Scheimpfug condition). This can be useful for particle image velocimetry (PIV) or +\f[\begin{bmatrix} +u \\ +v +\end{bmatrix} = \begin{bmatrix} +f_x x'' + c_x \\ +f_y y'' + c_y +\end{bmatrix}\f] + +where + +\f[\begin{bmatrix} +x'' \\ +y'' +\end{bmatrix} = \begin{bmatrix} +x' \frac{1 + k_1 r^2 + k_2 r^4 + k_3 r^6}{1 + k_4 r^2 + k_5 r^4 + k_6 r^6} + 2 p_1 x' y' + p_2(r^2 + 2 x'^2) + s_1 r^2 + s_2 r^4 \\ +y' \frac{1 + k_1 r^2 + k_2 r^4 + k_3 r^6}{1 + k_4 r^2 + k_5 r^4 + k_6 r^6} + p_1 (r^2 + 2 y'^2) + 2 p_2 x' y' + s_3 r^2 + s_4 r^4 \\ +\end{bmatrix}\f] + +with + +\f[r^2 = x'^2 + y'^2\f] + +and + +\f[\begin{bmatrix} +x'\\ +y' +\end{bmatrix} = \begin{bmatrix} +X_c/Z_c \\ +Y_c/Z_c +\end{bmatrix},\f] + +if \f$Z_c \ne 0\f$. + +The distortion parameters are the radial coefficients \f$k_1\f$, \f$k_2\f$, \f$k_3\f$, \f$k_4\f$, \f$k_5\f$, and \f$k_6\f$ +,\f$p_1\f$ and \f$p_2\f$ are the tangential distortion coefficients, and \f$s_1\f$, \f$s_2\f$, \f$s_3\f$, and \f$s_4\f$, +are the thin prism distortion coefficients. Higher-order coefficients are not considered in OpenCV. + +The next figures show two common types of radial distortion: barrel distortion +(\f$ 1 + k_1 r^2 + k_2 r^4 + k_3 r^6 \f$ monotonically decreasing) +and pincushion distortion (\f$ 1 + k_1 r^2 + k_2 r^4 + k_3 r^6 \f$ monotonically increasing). +Radial distortion is always monotonic for real lenses, +and if the estimator produces a non-monotonic result, +this should be considered a calibration failure. +More generally, radial distortion must be monotonic and the distortion function must be bijective. +A failed estimation result may look deceptively good near the image center +but will work poorly in e.g. AR/SFM applications. +The optimization method used in OpenCV camera calibration does not include these constraints as +the framework does not support the required integer programming and polynomial inequalities. +See [issue #15992](https://github.com/opencv/opencv/issues/15992) for additional information. + +![](pics/distortion_examples.png) +![](pics/distortion_examples2.png) + +In some cases, the image sensor may be tilted in order to focus an oblique plane in front of the +camera (Scheimpflug principle). This can be useful for particle image velocimetry (PIV) or triangulation with a laser fan. The tilt causes a perspective distortion of \f$x''\f$ and -\f$y''\f$. This distortion can be modelled in the following way, see e.g. @cite Louhichi07. +\f$y''\f$. This distortion can be modeled in the following way, see e.g. @cite Louhichi07. -\f[\begin{array}{l} -s\vecthree{x'''}{y'''}{1} = +\f[\begin{bmatrix} +u \\ +v +\end{bmatrix} = \begin{bmatrix} +f_x x''' + c_x \\ +f_y y''' + c_y +\end{bmatrix},\f] + +where + +\f[s\vecthree{x'''}{y'''}{1} = \vecthreethree{R_{33}(\tau_x, \tau_y)}{0}{-R_{13}(\tau_x, \tau_y)} {0}{R_{33}(\tau_x, \tau_y)}{-R_{23}(\tau_x, \tau_y)} -{0}{0}{1} R(\tau_x, \tau_y) \vecthree{x''}{y''}{1}\\ -u = f_x*x''' + c_x \\ -v = f_y*y''' + c_y -\end{array}\f] +{0}{0}{1} R(\tau_x, \tau_y) \vecthree{x''}{y''}{1}\f] -where the matrix \f$R(\tau_x, \tau_y)\f$ is defined by two rotations with angular parameter \f$\tau_x\f$ -and \f$\tau_y\f$, respectively, +and the matrix \f$R(\tau_x, \tau_y)\f$ is defined by two rotations with angular parameter +\f$\tau_x\f$ and \f$\tau_y\f$, respectively, \f[ R(\tau_x, \tau_y) = @@ -148,8 +317,8 @@ vector. That is, if the vector contains four elements, it means that \f$k_3=0\f$ coefficients do not depend on the scene viewed. Thus, they also belong to the intrinsic camera parameters. And they remain the same regardless of the captured image resolution. If, for example, a camera has been calibrated on images of 320 x 240 resolution, absolutely the same distortion -coefficients can be used for 640 x 480 images from the same camera while \f$f_x\f$, \f$f_y\f$, \f$c_x\f$, and -\f$c_y\f$ need to be scaled appropriately. +coefficients can be used for 640 x 480 images from the same camera while \f$f_x\f$, \f$f_y\f$, +\f$c_x\f$, and \f$c_y\f$ need to be scaled appropriately. The functions below use the above model to do the following: @@ -161,15 +330,68 @@ pattern (every view is described by several 3D-2D point correspondences). - Estimate the relative position and orientation of the stereo camera "heads" and compute the *rectification* transformation that makes the camera optical axes parallel. + Homogeneous Coordinates
+Homogeneous Coordinates are a system of coordinates that are used in projective geometry. Their use +allows to represent points at infinity by finite coordinates and simplifies formulas when compared +to the cartesian counterparts, e.g. they have the advantage that affine transformations can be +expressed as linear homogeneous transformation. + +One obtains the homogeneous vector \f$P_h\f$ by appending a 1 along an n-dimensional cartesian +vector \f$P\f$ e.g. for a 3D cartesian vector the mapping \f$P \rightarrow P_h\f$ is: + +\f[\begin{bmatrix} +X \\ +Y \\ +Z +\end{bmatrix} \rightarrow \begin{bmatrix} +X \\ +Y \\ +Z \\ +1 +\end{bmatrix}.\f] + +For the inverse mapping \f$P_h \rightarrow P\f$, one divides all elements of the homogeneous vector +by its last element, e.g. for a 3D homogeneous vector one gets its 2D cartesian counterpart by: + +\f[\begin{bmatrix} +X \\ +Y \\ +W +\end{bmatrix} \rightarrow \begin{bmatrix} +X / W \\ +Y / W +\end{bmatrix},\f] + +if \f$W \ne 0\f$. + +Due to this mapping, all multiples \f$k P_h\f$, for \f$k \ne 0\f$, of a homogeneous point represent +the same point \f$P_h\f$. An intuitive understanding of this property is that under a projective +transformation, all multiples of \f$P_h\f$ are mapped to the same point. This is the physical +observation one does for pinhole cameras, as all points along a ray through the camera's pinhole are +projected to the same image point, e.g. all points along the red ray in the image of the pinhole +camera model above would be mapped to the same image coordinate. This property is also the source +for the scale ambiguity s in the equation of the pinhole camera model. + +As mentioned, by using homogeneous coordinates we can express any change of basis parameterized by +\f$R\f$ and \f$t\f$ as a linear transformation, e.g. for the change of basis from coordinate system +0 to coordinate system 1 becomes: + +\f[P_1 = R P_0 + t \rightarrow P_{h_1} = \begin{bmatrix} +R & t \\ +0 & 1 +\end{bmatrix} P_{h_0}.\f] + @note - - A calibration sample for 3 cameras in horizontal position can be found at + - Many functions in this module take a camera matrix as an input parameter. Although all + functions assume the same structure of this parameter, they may name it differently. The + parameter's description, however, will be clear in that a camera matrix with the structure + shown above is required. + - A calibration sample for 3 cameras in a horizontal position can be found at opencv_source_code/samples/cpp/3calibration.cpp - A calibration sample based on a sequence of images can be found at opencv_source_code/samples/cpp/calibration.cpp - A calibration sample in order to do 3D reconstruction can be found at opencv_source_code/samples/cpp/build3dmodel.cpp - - A calibration sample of an artificially generated camera and chessboard patterns can be - found at opencv_source_code/samples/cpp/calibration_artificial.cpp - A calibration example on stereo calibration can be found at opencv_source_code/samples/cpp/stereo_calib.cpp - A calibration example on stereo matching can be found at @@ -190,7 +412,7 @@ pattern (every view is described by several 3D-2D point correspondences). \f[x = Xc_1 \\ y = Xc_2 \\ z = Xc_3\f] - The pinehole projection coordinates of P is [a; b] where + The pinhole projection coordinates of P is [a; b] where \f[a = x / z \ and \ b = y / z \\ r^2 = a^2 + b^2 \\ \theta = atan(r)\f] @@ -219,23 +441,40 @@ namespace cv //! @{ //! type of the robust estimation algorithm -enum { LMEDS = 4, //!< least-median algorithm +enum { LMEDS = 4, //!< least-median of squares algorithm RANSAC = 8, //!< RANSAC algorithm RHO = 16 //!< RHO algorithm }; -enum { SOLVEPNP_ITERATIVE = 0, - SOLVEPNP_EPNP = 1, //!< EPnP: Efficient Perspective-n-Point Camera Pose Estimation @cite lepetit2009epnp - SOLVEPNP_P3P = 2, //!< Complete Solution Classification for the Perspective-Three-Point Problem @cite gao2003complete - SOLVEPNP_DLS = 3, //!< A Direct Least-Squares (DLS) Method for PnP @cite hesch2011direct - SOLVEPNP_UPNP = 4 //!< Exhaustive Linearization for Robust Camera Pose and Focal Length Estimation @cite penate2013exhaustive - +enum SolvePnPMethod { + SOLVEPNP_ITERATIVE = 0, + SOLVEPNP_EPNP = 1, //!< EPnP: Efficient Perspective-n-Point Camera Pose Estimation @cite lepetit2009epnp + SOLVEPNP_P3P = 2, //!< Complete Solution Classification for the Perspective-Three-Point Problem @cite gao2003complete + SOLVEPNP_DLS = 3, //!< A Direct Least-Squares (DLS) Method for PnP @cite hesch2011direct + SOLVEPNP_UPNP = 4, //!< Exhaustive Linearization for Robust Camera Pose and Focal Length Estimation @cite penate2013exhaustive + SOLVEPNP_AP3P = 5, //!< An Efficient Algebraic Solution to the Perspective-Three-Point Problem @cite Ke17 + SOLVEPNP_IPPE = 6, //!< Infinitesimal Plane-Based Pose Estimation @cite Collins14 \n + //!< Object points must be coplanar. + SOLVEPNP_IPPE_SQUARE = 7, //!< Infinitesimal Plane-Based Pose Estimation @cite Collins14 \n + //!< This is a special case suitable for marker pose estimation.\n + //!< 4 coplanar object points must be defined in the following order: + //!< - point 0: [-squareLength / 2, squareLength / 2, 0] + //!< - point 1: [ squareLength / 2, squareLength / 2, 0] + //!< - point 2: [ squareLength / 2, -squareLength / 2, 0] + //!< - point 3: [-squareLength / 2, -squareLength / 2, 0] +#ifndef CV_DOXYGEN + SOLVEPNP_MAX_COUNT //!< Used for count +#endif }; enum { CALIB_CB_ADAPTIVE_THRESH = 1, CALIB_CB_NORMALIZE_IMAGE = 2, CALIB_CB_FILTER_QUADS = 4, - CALIB_CB_FAST_CHECK = 8 + CALIB_CB_FAST_CHECK = 8, + CALIB_CB_EXHAUSTIVE = 16, + CALIB_CB_ACCURACY = 32, + CALIB_CB_LARGER = 64, + CALIB_CB_MARKER = 128 }; enum { CALIB_CB_SYMMETRIC_GRID = 1, @@ -243,7 +482,8 @@ enum { CALIB_CB_SYMMETRIC_GRID = 1, CALIB_CB_CLUSTERING = 4 }; -enum { CALIB_USE_INTRINSIC_GUESS = 0x00001, +enum { CALIB_NINTRINSIC = 18, + CALIB_USE_INTRINSIC_GUESS = 0x00001, CALIB_FIX_ASPECT_RATIO = 0x00002, CALIB_FIX_PRINCIPAL_POINT = 0x00004, CALIB_ZERO_TANGENT_DIST = 0x00008, @@ -259,21 +499,32 @@ enum { CALIB_USE_INTRINSIC_GUESS = 0x00001, CALIB_FIX_S1_S2_S3_S4 = 0x10000, CALIB_TILTED_MODEL = 0x40000, CALIB_FIX_TAUX_TAUY = 0x80000, + CALIB_USE_QR = 0x100000, //!< use QR instead of SVD decomposition for solving. Faster but potentially less precise + CALIB_FIX_TANGENT_DIST = 0x200000, // only for stereo CALIB_FIX_INTRINSIC = 0x00100, CALIB_SAME_FOCAL_LENGTH = 0x00200, // for stereo rectification CALIB_ZERO_DISPARITY = 0x00400, CALIB_USE_LU = (1 << 17), //!< use LU instead of SVD decomposition for solving. much faster but potentially less precise + CALIB_USE_EXTRINSIC_GUESS = (1 << 22) //!< for stereoCalibrate }; //! the algorithm for finding fundamental matrix enum { FM_7POINT = 1, //!< 7-point algorithm FM_8POINT = 2, //!< 8-point algorithm - FM_LMEDS = 4, //!< least-median algorithm - FM_RANSAC = 8 //!< RANSAC algorithm + FM_LMEDS = 4, //!< least-median algorithm. 7-point algorithm is used. + FM_RANSAC = 8 //!< RANSAC algorithm. It needs at least 15 points. 7-point algorithm is used. }; +enum HandEyeCalibrationMethod +{ + CALIB_HAND_EYE_TSAI = 0, //!< A New Technique for Fully Autonomous and Efficient 3D Robotics Hand/Eye Calibration @cite Tsai89 + CALIB_HAND_EYE_PARK = 1, //!< Robot Sensor Calibration: Solving AX = XB on the Euclidean Group @cite Park94 + CALIB_HAND_EYE_HORAUD = 2, //!< Hand-eye Calibration @cite Horaud95 + CALIB_HAND_EYE_ANDREFF = 3, //!< On-line Hand-Eye Calibration @cite Andreff99 + CALIB_HAND_EYE_DANIILIDIS = 4 //!< Hand-Eye Calibration Using Dual Quaternions @cite Daniilidis98 +}; /** @brief Converts a rotation matrix to a rotation vector or vice versa. @@ -283,7 +534,7 @@ enum { FM_7POINT = 1, //!< 7-point algorithm @param jacobian Optional output Jacobian matrix, 3x9 or 9x3, which is a matrix of partial derivatives of the output array components with respect to the input array components. -\f[\begin{array}{l} \theta \leftarrow norm(r) \\ r \leftarrow r/ \theta \\ R = \cos{\theta} I + (1- \cos{\theta} ) r r^T + \sin{\theta} \vecthreethree{0}{-r_z}{r_y}{r_z}{0}{-r_x}{-r_y}{r_x}{0} \end{array}\f] +\f[\begin{array}{l} \theta \leftarrow norm(r) \\ r \leftarrow r/ \theta \\ R = \cos(\theta) I + (1- \cos{\theta} ) r r^T + \sin(\theta) \vecthreethree{0}{-r_z}{r_y}{r_z}{0}{-r_x}{-r_y}{r_x}{0} \end{array}\f] Inverse transformation can be also done easily, since @@ -291,32 +542,114 @@ Inverse transformation can be also done easily, since A rotation vector is a convenient and most compact representation of a rotation matrix (since any rotation matrix has just 3 degrees of freedom). The representation is used in the global 3D geometry -optimization procedures like calibrateCamera, stereoCalibrate, or solvePnP . +optimization procedures like @ref calibrateCamera, @ref stereoCalibrate, or @ref solvePnP . + +@note More information about the computation of the derivative of a 3D rotation matrix with respect to its exponential coordinate +can be found in: + - A Compact Formula for the Derivative of a 3-D Rotation in Exponential Coordinates, Guillermo Gallego, Anthony J. Yezzi @cite Gallego2014ACF + +@note Useful information on SE(3) and Lie Groups can be found in: + - A tutorial on SE(3) transformation parameterizations and on-manifold optimization, Jose-Luis Blanco @cite blanco2010tutorial + - Lie Groups for 2D and 3D Transformation, Ethan Eade @cite Eade17 + - A micro Lie theory for state estimation in robotics, Joan Solà, Jérémie Deray, Dinesh Atchuthan @cite Sol2018AML */ CV_EXPORTS_W void Rodrigues( InputArray src, OutputArray dst, OutputArray jacobian = noArray() ); + + +/** Levenberg-Marquardt solver. Starting with the specified vector of parameters it + optimizes the target vector criteria "err" + (finds local minima of each target vector component absolute value). + + When needed, it calls user-provided callback. +*/ +class CV_EXPORTS LMSolver : public Algorithm +{ +public: + class CV_EXPORTS Callback + { + public: + virtual ~Callback() {} + /** + computes error and Jacobian for the specified vector of parameters + + @param param the current vector of parameters + @param err output vector of errors: err_i = actual_f_i - ideal_f_i + @param J output Jacobian: J_ij = d(err_i)/d(param_j) + + when J=noArray(), it means that it does not need to be computed. + Dimensionality of error vector and param vector can be different. + The callback should explicitly allocate (with "create" method) each output array + (unless it's noArray()). + */ + virtual bool compute(InputArray param, OutputArray err, OutputArray J) const = 0; + }; + + /** + Runs Levenberg-Marquardt algorithm using the passed vector of parameters as the start point. + The final vector of parameters (whether the algorithm converged or not) is stored at the same + vector. The method returns the number of iterations used. If it's equal to the previously specified + maxIters, there is a big chance the algorithm did not converge. + + @param param initial/final vector of parameters. + + Note that the dimensionality of parameter space is defined by the size of param vector, + and the dimensionality of optimized criteria is defined by the size of err vector + computed by the callback. + */ + virtual int run(InputOutputArray param) const = 0; + + /** + Sets the maximum number of iterations + @param maxIters the number of iterations + */ + virtual void setMaxIters(int maxIters) = 0; + /** + Retrieves the current maximum number of iterations + */ + virtual int getMaxIters() const = 0; + + /** + Creates Levenberg-Marquard solver + + @param cb callback + @param maxIters maximum number of iterations that can be further + modified using setMaxIters() method. + */ + static Ptr create(const Ptr& cb, int maxIters); + static Ptr create(const Ptr& cb, int maxIters, double eps); +}; + + + +/** @example samples/cpp/tutorial_code/features2D/Homography/pose_from_homography.cpp +An example program about pose estimation from coplanar points + +Check @ref tutorial_homography "the corresponding tutorial" for more details +*/ + /** @brief Finds a perspective transformation between two planes. @param srcPoints Coordinates of the points in the original plane, a matrix of the type CV_32FC2 or vector\ . @param dstPoints Coordinates of the points in the target plane, a matrix of the type CV_32FC2 or a vector\ . -@param method Method used to computed a homography matrix. The following methods are possible: -- **0** - a regular method using all the points +@param method Method used to compute a homography matrix. The following methods are possible: +- **0** - a regular method using all the points, i.e., the least squares method - **RANSAC** - RANSAC-based robust method - **LMEDS** - Least-Median robust method -- **RHO** - PROSAC-based robust method +- **RHO** - PROSAC-based robust method @param ransacReprojThreshold Maximum allowed reprojection error to treat a point pair as an inlier (used in the RANSAC and RHO methods only). That is, if -\f[\| \texttt{dstPoints} _i - \texttt{convertPointsHomogeneous} ( \texttt{H} * \texttt{srcPoints} _i) \| > \texttt{ransacReprojThreshold}\f] -then the point \f$i\f$ is considered an outlier. If srcPoints and dstPoints are measured in pixels, +\f[\| \texttt{dstPoints} _i - \texttt{convertPointsHomogeneous} ( \texttt{H} * \texttt{srcPoints} _i) \|_2 > \texttt{ransacReprojThreshold}\f] +then the point \f$i\f$ is considered as an outlier. If srcPoints and dstPoints are measured in pixels, it usually makes sense to set this parameter somewhere in the range of 1 to 10. @param mask Optional output mask set by a robust method ( RANSAC or LMEDS ). Note that the input mask values are ignored. -@param maxIters The maximum number of RANSAC iterations, 2000 is the maximum it can be. +@param maxIters The maximum number of RANSAC iterations. @param confidence Confidence level, between 0 and 1. -The functions find and return the perspective transformation \f$H\f$ between the source and the +The function finds and returns the perspective transformation \f$H\f$ between the source and the destination planes: \f[s_i \vecthree{x'_i}{y'_i}{1} \sim H \vecthree{x_i}{y_i}{1}\f] @@ -331,10 +664,10 @@ pairs to compute an initial homography estimate with a simple least-squares sche However, if not all of the point pairs ( \f$srcPoints_i\f$, \f$dstPoints_i\f$ ) fit the rigid perspective transformation (that is, there are some outliers), this initial estimate will be poor. In this case, you can use one of the three robust methods. The methods RANSAC, LMeDS and RHO try many different -random subsets of the corresponding point pairs (of four pairs each), estimate the homography matrix -using this subset and a simple least-square algorithm, and then compute the quality/goodness of the -computed homography (which is the number of inliers for RANSAC or the median re-projection error for -LMeDs). The best subset is then used to produce the initial estimate of the homography matrix and +random subsets of the corresponding point pairs (of four pairs each, collinear pairs are discarded), estimate the homography matrix +using this subset and a simple least-squares algorithm, and then compute the quality/goodness of the +computed homography (which is the number of inliers for RANSAC or the least median re-projection error for +LMeDS). The best subset is then used to produce the initial estimate of the homography matrix and the mask of inliers/outliers. Regardless of the method, robust or not, the computed homography matrix is refined further (using @@ -347,17 +680,12 @@ correctly only when there are more than 50% of inliers. Finally, if there are no noise is rather small, use the default method (method=0). The function is used to find initial intrinsic and extrinsic matrices. Homography matrix is -determined up to a scale. Thus, it is normalized so that \f$h_{33}=1\f$. Note that whenever an H matrix +determined up to a scale. Thus, it is normalized so that \f$h_{33}=1\f$. Note that whenever an \f$H\f$ matrix cannot be estimated, an empty one will be returned. @sa - getAffineTransform, getPerspectiveTransform, estimateRigidTransform, warpPerspective, - perspectiveTransform - -@note - - A example on calculating a homography for image matching can be found at - opencv_source_code/samples/cpp/video_homography.cpp - +getAffineTransform, estimateAffine2D, estimateAffinePartial2D, getPerspectiveTransform, warpPerspective, +perspectiveTransform */ CV_EXPORTS_W Mat findHomography( InputArray srcPoints, InputArray dstPoints, int method = 0, double ransacReprojThreshold = 3, @@ -383,8 +711,8 @@ and a rotation matrix. It optionally returns three rotation matrices, one for each axis, and the three Euler angles in degrees (as the return value) that could be used in OpenGL. Note, there is always more than one -sequence of rotations about the three principle axes that results in the same orientation of an -object, eg. see @cite Slabaugh . Returned tree rotation matrices and corresponding three Euler angules +sequence of rotations about the three principal axes that results in the same orientation of an +object, e.g. see @cite Slabaugh . Returned tree rotation matrices and corresponding three Euler angles are only one of the possible solutions. */ CV_EXPORTS_W Vec3d RQDecomp3x3( InputArray src, OutputArray mtxR, OutputArray mtxQ, @@ -409,8 +737,8 @@ matrix and the position of a camera. It optionally returns three rotation matrices, one for each axis, and three Euler angles that could be used in OpenGL. Note, there is always more than one sequence of rotations about the three -principle axes that results in the same orientation of an object, eg. see @cite Slabaugh . Returned -tree rotation matrices and corresponding three Euler angules are only one of the possible solutions. +principal axes that results in the same orientation of an object, e.g. see @cite Slabaugh . Returned +tree rotation matrices and corresponding three Euler angles are only one of the possible solutions. The function is based on RQDecomp3x3 . */ @@ -444,15 +772,14 @@ CV_EXPORTS_W void matMulDeriv( InputArray A, InputArray B, OutputArray dABdA, Ou @param tvec2 Second translation vector. @param rvec3 Output rotation vector of the superposition. @param tvec3 Output translation vector of the superposition. -@param dr3dr1 -@param dr3dt1 -@param dr3dr2 -@param dr3dt2 -@param dt3dr1 -@param dt3dt1 -@param dt3dr2 -@param dt3dt2 Optional output derivatives of rvec3 or tvec3 with regard to rvec1, rvec2, tvec1 and -tvec2, respectively. +@param dr3dr1 Optional output derivative of rvec3 with regard to rvec1 +@param dr3dt1 Optional output derivative of rvec3 with regard to tvec1 +@param dr3dr2 Optional output derivative of rvec3 with regard to rvec2 +@param dr3dt2 Optional output derivative of rvec3 with regard to tvec2 +@param dt3dr1 Optional output derivative of tvec3 with regard to rvec1 +@param dt3dt1 Optional output derivative of tvec3 with regard to tvec1 +@param dt3dr2 Optional output derivative of tvec3 with regard to rvec2 +@param dt3dt2 Optional output derivative of tvec3 with regard to tvec2 The functions compute: @@ -476,35 +803,37 @@ CV_EXPORTS_W void composeRT( InputArray rvec1, InputArray tvec1, /** @brief Projects 3D points to an image plane. -@param objectPoints Array of object points, 3xN/Nx3 1-channel or 1xN/Nx1 3-channel (or -vector\ ), where N is the number of points in the view. -@param rvec Rotation vector. See Rodrigues for details. -@param tvec Translation vector. +@param objectPoints Array of object points expressed wrt. the world coordinate frame. A 3xN/Nx3 +1-channel or 1xN/Nx1 3-channel (or vector\ ), where N is the number of points in the view. +@param rvec The rotation vector (@ref Rodrigues) that, together with tvec, performs a change of +basis from world to camera coordinate system, see @ref calibrateCamera for details. +@param tvec The translation vector, see parameter description above. @param cameraMatrix Camera matrix \f$A = \vecthreethree{f_x}{0}{c_x}{0}{f_y}{c_y}{0}{0}{_1}\f$ . @param distCoeffs Input vector of distortion coefficients \f$(k_1, k_2, p_1, p_2[, k_3[, k_4, k_5, k_6 [, s_1, s_2, s_3, s_4[, \tau_x, \tau_y]]]])\f$ of 4, 5, 8, 12 or 14 elements. If the vector is empty, the zero distortion coefficients are assumed. -@param imagePoints Output array of image points, 2xN/Nx2 1-channel or 1xN/Nx1 2-channel, or +@param imagePoints Output array of image points, 1xN/Nx1 2-channel, or vector\ . @param jacobian Optional output 2Nx(10+\) jacobian matrix of derivatives of image points with respect to components of the rotation vector, translation vector, focal lengths, coordinates of the principal point and the distortion coefficients. In the old interface different components of the jacobian are returned via different output parameters. @param aspectRatio Optional "fixed aspect ratio" parameter. If the parameter is not 0, the -function assumes that the aspect ratio (*fx/fy*) is fixed and correspondingly adjusts the jacobian -matrix. - -The function computes projections of 3D points to the image plane given intrinsic and extrinsic -camera parameters. Optionally, the function computes Jacobians - matrices of partial derivatives of -image points coordinates (as functions of all the input parameters) with respect to the particular -parameters, intrinsic and/or extrinsic. The Jacobians are used during the global optimization in -calibrateCamera, solvePnP, and stereoCalibrate . The function itself can also be used to compute a -re-projection error given the current intrinsic and extrinsic parameters. - -@note By setting rvec=tvec=(0,0,0) or by setting cameraMatrix to a 3x3 identity matrix, or by -passing zero distortion coefficients, you can get various useful partial cases of the function. This -means that you can compute the distorted coordinates for a sparse set of points or apply a -perspective transformation (and also compute the derivatives) in the ideal zero-distortion setup. +function assumes that the aspect ratio (\f$f_x / f_y\f$) is fixed and correspondingly adjusts the +jacobian matrix. + +The function computes the 2D projections of 3D points to the image plane, given intrinsic and +extrinsic camera parameters. Optionally, the function computes Jacobians -matrices of partial +derivatives of image points coordinates (as functions of all the input parameters) with respect to +the particular parameters, intrinsic and/or extrinsic. The Jacobians are used during the global +optimization in @ref calibrateCamera, @ref solvePnP, and @ref stereoCalibrate. The function itself +can also be used to compute a re-projection error, given the current intrinsic and extrinsic +parameters. + +@note By setting rvec = tvec = \f$[0, 0, 0]\f$, or by setting cameraMatrix to a 3x3 identity matrix, +or by passing zero distortion coefficients, one can get various useful partial cases of the +function. This means, one can compute the distorted coordinates for a sparse set of points or apply +a perspective transformation (and also compute the derivatives) in the ideal zero-distortion setup. */ CV_EXPORTS_W void projectPoints( InputArray objectPoints, InputArray rvec, InputArray tvec, @@ -513,43 +842,162 @@ CV_EXPORTS_W void projectPoints( InputArray objectPoints, OutputArray jacobian = noArray(), double aspectRatio = 0 ); +/** @example samples/cpp/tutorial_code/features2D/Homography/homography_from_camera_displacement.cpp +An example program about homography from the camera displacement + +Check @ref tutorial_homography "the corresponding tutorial" for more details +*/ + /** @brief Finds an object pose from 3D-2D point correspondences. +This function returns the rotation and the translation vectors that transform a 3D point expressed in the object +coordinate frame to the camera coordinate frame, using different methods: +- P3P methods (@ref SOLVEPNP_P3P, @ref SOLVEPNP_AP3P): need 4 input points to return a unique solution. +- @ref SOLVEPNP_IPPE Input points must be >= 4 and object points must be coplanar. +- @ref SOLVEPNP_IPPE_SQUARE Special case suitable for marker pose estimation. +Number of input points must be 4. Object points must be defined in the following order: + - point 0: [-squareLength / 2, squareLength / 2, 0] + - point 1: [ squareLength / 2, squareLength / 2, 0] + - point 2: [ squareLength / 2, -squareLength / 2, 0] + - point 3: [-squareLength / 2, -squareLength / 2, 0] +- for all the other flags, number of input points must be >= 4 and object points can be in any configuration. @param objectPoints Array of object points in the object coordinate space, Nx3 1-channel or -1xN/Nx1 3-channel, where N is the number of points. vector\ can be also passed here. +1xN/Nx1 3-channel, where N is the number of points. vector\ can be also passed here. @param imagePoints Array of corresponding image points, Nx2 1-channel or 1xN/Nx1 2-channel, -where N is the number of points. vector\ can be also passed here. -@param cameraMatrix Input camera matrix \f$A = \vecthreethree{fx}{0}{cx}{0}{fy}{cy}{0}{0}{1}\f$ . +where N is the number of points. vector\ can be also passed here. +@param cameraMatrix Input camera matrix \f$A = \vecthreethree{f_x}{0}{c_x}{0}{f_y}{c_y}{0}{0}{1}\f$ . @param distCoeffs Input vector of distortion coefficients \f$(k_1, k_2, p_1, p_2[, k_3[, k_4, k_5, k_6 [, s_1, s_2, s_3, s_4[, \tau_x, \tau_y]]]])\f$ of 4, 5, 8, 12 or 14 elements. If the vector is NULL/empty, the zero distortion coefficients are assumed. -@param rvec Output rotation vector (see Rodrigues ) that, together with tvec , brings points from +@param rvec Output rotation vector (see @ref Rodrigues ) that, together with tvec, brings points from the model coordinate system to the camera coordinate system. @param tvec Output translation vector. -@param useExtrinsicGuess Parameter used for SOLVEPNP_ITERATIVE. If true (1), the function uses +@param useExtrinsicGuess Parameter used for #SOLVEPNP_ITERATIVE. If true (1), the function uses the provided rvec and tvec values as initial approximations of the rotation and translation vectors, respectively, and further optimizes them. @param flags Method for solving a PnP problem: -- **SOLVEPNP_ITERATIVE** Iterative method is based on Levenberg-Marquardt optimization. In +- **SOLVEPNP_ITERATIVE** Iterative method is based on a Levenberg-Marquardt optimization. In this case the function finds such a pose that minimizes reprojection error, that is the sum of squared distances between the observed projections imagePoints and the projected (using projectPoints ) objectPoints . - **SOLVEPNP_P3P** Method is based on the paper of X.S. Gao, X.-R. Hou, J. Tang, H.-F. Chang -"Complete Solution Classification for the Perspective-Three-Point Problem". In this case the -function requires exactly four object and image points. -- **SOLVEPNP_EPNP** Method has been introduced by F.Moreno-Noguer, V.Lepetit and P.Fua in the -paper "EPnP: Efficient Perspective-n-Point Camera Pose Estimation". -- **SOLVEPNP_DLS** Method is based on the paper of Joel A. Hesch and Stergios I. Roumeliotis. -"A Direct Least-Squares (DLS) Method for PnP". -- **SOLVEPNP_UPNP** Method is based on the paper of A.Penate-Sanchez, J.Andrade-Cetto, -F.Moreno-Noguer. "Exhaustive Linearization for Robust Camera Pose and Focal Length -Estimation". In this case the function also estimates the parameters \f$f_x\f$ and \f$f_y\f$ +"Complete Solution Classification for the Perspective-Three-Point Problem" (@cite gao2003complete). +In this case the function requires exactly four object and image points. +- **SOLVEPNP_AP3P** Method is based on the paper of T. Ke, S. Roumeliotis +"An Efficient Algebraic Solution to the Perspective-Three-Point Problem" (@cite Ke17). +In this case the function requires exactly four object and image points. +- **SOLVEPNP_EPNP** Method has been introduced by F. Moreno-Noguer, V. Lepetit and P. Fua in the +paper "EPnP: Efficient Perspective-n-Point Camera Pose Estimation" (@cite lepetit2009epnp). +- **SOLVEPNP_DLS** Method is based on the paper of J. Hesch and S. Roumeliotis. +"A Direct Least-Squares (DLS) Method for PnP" (@cite hesch2011direct). +- **SOLVEPNP_UPNP** Method is based on the paper of A. Penate-Sanchez, J. Andrade-Cetto, +F. Moreno-Noguer. "Exhaustive Linearization for Robust Camera Pose and Focal Length +Estimation" (@cite penate2013exhaustive). In this case the function also estimates the parameters \f$f_x\f$ and \f$f_y\f$ assuming that both have the same value. Then the cameraMatrix is updated with the estimated focal length. +- **SOLVEPNP_IPPE** Method is based on the paper of T. Collins and A. Bartoli. +"Infinitesimal Plane-Based Pose Estimation" (@cite Collins14). This method requires coplanar object points. +- **SOLVEPNP_IPPE_SQUARE** Method is based on the paper of Toby Collins and Adrien Bartoli. +"Infinitesimal Plane-Based Pose Estimation" (@cite Collins14). This method is suitable for marker pose estimation. +It requires 4 coplanar object points defined in the following order: + - point 0: [-squareLength / 2, squareLength / 2, 0] + - point 1: [ squareLength / 2, squareLength / 2, 0] + - point 2: [ squareLength / 2, -squareLength / 2, 0] + - point 3: [-squareLength / 2, -squareLength / 2, 0] The function estimates the object pose given a set of object points, their corresponding image -projections, as well as the camera matrix and the distortion coefficients. +projections, as well as the camera matrix and the distortion coefficients, see the figure below +(more precisely, the X-axis of the camera frame is pointing to the right, the Y-axis downward +and the Z-axis forward). + +![](pnp.jpg) + +Points expressed in the world frame \f$ \bf{X}_w \f$ are projected into the image plane \f$ \left[ u, v \right] \f$ +using the perspective projection model \f$ \Pi \f$ and the camera intrinsic parameters matrix \f$ \bf{A} \f$: + +\f[ + \begin{align*} + \begin{bmatrix} + u \\ + v \\ + 1 + \end{bmatrix} &= + \bf{A} \hspace{0.1em} \Pi \hspace{0.2em} ^{c}\bf{T}_w + \begin{bmatrix} + X_{w} \\ + Y_{w} \\ + Z_{w} \\ + 1 + \end{bmatrix} \\ + \begin{bmatrix} + u \\ + v \\ + 1 + \end{bmatrix} &= + \begin{bmatrix} + f_x & 0 & c_x \\ + 0 & f_y & c_y \\ + 0 & 0 & 1 + \end{bmatrix} + \begin{bmatrix} + 1 & 0 & 0 & 0 \\ + 0 & 1 & 0 & 0 \\ + 0 & 0 & 1 & 0 + \end{bmatrix} + \begin{bmatrix} + r_{11} & r_{12} & r_{13} & t_x \\ + r_{21} & r_{22} & r_{23} & t_y \\ + r_{31} & r_{32} & r_{33} & t_z \\ + 0 & 0 & 0 & 1 + \end{bmatrix} + \begin{bmatrix} + X_{w} \\ + Y_{w} \\ + Z_{w} \\ + 1 + \end{bmatrix} + \end{align*} +\f] + +The estimated pose is thus the rotation (`rvec`) and the translation (`tvec`) vectors that allow transforming +a 3D point expressed in the world frame into the camera frame: + +\f[ + \begin{align*} + \begin{bmatrix} + X_c \\ + Y_c \\ + Z_c \\ + 1 + \end{bmatrix} &= + \hspace{0.2em} ^{c}\bf{T}_w + \begin{bmatrix} + X_{w} \\ + Y_{w} \\ + Z_{w} \\ + 1 + \end{bmatrix} \\ + \begin{bmatrix} + X_c \\ + Y_c \\ + Z_c \\ + 1 + \end{bmatrix} &= + \begin{bmatrix} + r_{11} & r_{12} & r_{13} & t_x \\ + r_{21} & r_{22} & r_{23} & t_y \\ + r_{31} & r_{32} & r_{33} & t_z \\ + 0 & 0 & 0 & 1 + \end{bmatrix} + \begin{bmatrix} + X_{w} \\ + Y_{w} \\ + Z_{w} \\ + 1 + \end{bmatrix} + \end{align*} +\f] @note - An example of how to use solvePnP for planar augmented reality can be found at @@ -564,6 +1012,22 @@ projections, as well as the camera matrix and the distortion coefficients. - Thus, given some data D = np.array(...) where D.shape = (N,M), in order to use a subset of it as, e.g., imagePoints, one must effectively copy it into a new array: imagePoints = np.ascontiguousarray(D[:,:2]).reshape((N,1,2)) + - The methods **SOLVEPNP_DLS** and **SOLVEPNP_UPNP** cannot be used as the current implementations are + unstable and sometimes give completely wrong results. If you pass one of these two + flags, **SOLVEPNP_EPNP** method will be used instead. + - The minimum number of points is 4 in the general case. In the case of **SOLVEPNP_P3P** and **SOLVEPNP_AP3P** + methods, it is required to use exactly 4 points (the first 3 points are used to estimate all the solutions + of the P3P problem, the last one is used to retain the best solution that minimizes the reprojection error). + - With **SOLVEPNP_ITERATIVE** method and `useExtrinsicGuess=true`, the minimum number of points is 3 (3 points + are sufficient to compute a pose but there are up to 4 solutions). The initial solution should be close to the + global solution to converge. + - With **SOLVEPNP_IPPE** input points must be >= 4 and object points must be coplanar. + - With **SOLVEPNP_IPPE_SQUARE** this is a special case suitable for marker pose estimation. + Number of input points must be 4. Object points must be defined in the following order: + - point 0: [-squareLength / 2, squareLength / 2, 0] + - point 1: [ squareLength / 2, squareLength / 2, 0] + - point 2: [ squareLength / 2, -squareLength / 2, 0] + - point 3: [-squareLength / 2, -squareLength / 2, 0] */ CV_EXPORTS_W bool solvePnP( InputArray objectPoints, InputArray imagePoints, InputArray cameraMatrix, InputArray distCoeffs, @@ -573,18 +1037,18 @@ CV_EXPORTS_W bool solvePnP( InputArray objectPoints, InputArray imagePoints, /** @brief Finds an object pose from 3D-2D point correspondences using the RANSAC scheme. @param objectPoints Array of object points in the object coordinate space, Nx3 1-channel or -1xN/Nx1 3-channel, where N is the number of points. vector\ can be also passed here. +1xN/Nx1 3-channel, where N is the number of points. vector\ can be also passed here. @param imagePoints Array of corresponding image points, Nx2 1-channel or 1xN/Nx1 2-channel, -where N is the number of points. vector\ can be also passed here. +where N is the number of points. vector\ can be also passed here. @param cameraMatrix Input camera matrix \f$A = \vecthreethree{fx}{0}{cx}{0}{fy}{cy}{0}{0}{1}\f$ . @param distCoeffs Input vector of distortion coefficients \f$(k_1, k_2, p_1, p_2[, k_3[, k_4, k_5, k_6 [, s_1, s_2, s_3, s_4[, \tau_x, \tau_y]]]])\f$ of 4, 5, 8, 12 or 14 elements. If the vector is NULL/empty, the zero distortion coefficients are assumed. -@param rvec Output rotation vector (see Rodrigues ) that, together with tvec , brings points from +@param rvec Output rotation vector (see @ref Rodrigues ) that, together with tvec, brings points from the model coordinate system to the camera coordinate system. @param tvec Output translation vector. -@param useExtrinsicGuess Parameter used for SOLVEPNP_ITERATIVE. If true (1), the function uses +@param useExtrinsicGuess Parameter used for @ref SOLVEPNP_ITERATIVE. If true (1), the function uses the provided rvec and tvec values as initial approximations of the rotation and translation vectors, respectively, and further optimizes them. @param iterationsCount Number of iterations. @@ -593,17 +1057,24 @@ is the maximum allowed distance between the observed and computed point projecti an inlier. @param confidence The probability that the algorithm produces a useful result. @param inliers Output vector that contains indices of inliers in objectPoints and imagePoints . -@param flags Method for solving a PnP problem (see solvePnP ). +@param flags Method for solving a PnP problem (see @ref solvePnP ). The function estimates an object pose given a set of object points, their corresponding image projections, as well as the camera matrix and the distortion coefficients. This function finds such a pose that minimizes reprojection error, that is, the sum of squared distances between the observed -projections imagePoints and the projected (using projectPoints ) objectPoints. The use of RANSAC +projections imagePoints and the projected (using @ref projectPoints ) objectPoints. The use of RANSAC makes the function resistant to outliers. @note - An example of how to use solvePNPRansac for object detection can be found at opencv_source_code/samples/cpp/tutorial_code/calib3d/real_time_pose_estimation/ + - The default method used to estimate the camera pose for the Minimal Sample Sets step + is #SOLVEPNP_EPNP. Exceptions are: + - if you choose #SOLVEPNP_P3P or #SOLVEPNP_AP3P, these methods will be used. + - if the number of input points is equal to 4, #SOLVEPNP_P3P is used. + - The method used to estimate the camera pose using all the inliers is defined by the + flags parameters unless it is equal to #SOLVEPNP_P3P or #SOLVEPNP_AP3P. In this case, + the method #SOLVEPNP_EPNP will be used instead. */ CV_EXPORTS_W bool solvePnPRansac( InputArray objectPoints, InputArray imagePoints, InputArray cameraMatrix, InputArray distCoeffs, @@ -612,6 +1083,292 @@ CV_EXPORTS_W bool solvePnPRansac( InputArray objectPoints, InputArray imagePoint float reprojectionError = 8.0, double confidence = 0.99, OutputArray inliers = noArray(), int flags = SOLVEPNP_ITERATIVE ); +/** @brief Finds an object pose from 3 3D-2D point correspondences. + +@param objectPoints Array of object points in the object coordinate space, 3x3 1-channel or +1x3/3x1 3-channel. vector\ can be also passed here. +@param imagePoints Array of corresponding image points, 3x2 1-channel or 1x3/3x1 2-channel. + vector\ can be also passed here. +@param cameraMatrix Input camera matrix \f$A = \vecthreethree{f_x}{0}{c_x}{0}{f_y}{c_y}{0}{0}{1}\f$ . +@param distCoeffs Input vector of distortion coefficients +\f$(k_1, k_2, p_1, p_2[, k_3[, k_4, k_5, k_6 [, s_1, s_2, s_3, s_4[, \tau_x, \tau_y]]]])\f$ of +4, 5, 8, 12 or 14 elements. If the vector is NULL/empty, the zero distortion coefficients are +assumed. +@param rvecs Output rotation vectors (see @ref Rodrigues ) that, together with tvecs, brings points from +the model coordinate system to the camera coordinate system. A P3P problem has up to 4 solutions. +@param tvecs Output translation vectors. +@param flags Method for solving a P3P problem: +- **SOLVEPNP_P3P** Method is based on the paper of X.S. Gao, X.-R. Hou, J. Tang, H.-F. Chang +"Complete Solution Classification for the Perspective-Three-Point Problem" (@cite gao2003complete). +- **SOLVEPNP_AP3P** Method is based on the paper of T. Ke and S. Roumeliotis. +"An Efficient Algebraic Solution to the Perspective-Three-Point Problem" (@cite Ke17). + +The function estimates the object pose given 3 object points, their corresponding image +projections, as well as the camera matrix and the distortion coefficients. + +@note +The solutions are sorted by reprojection errors (lowest to highest). + */ +CV_EXPORTS_W int solveP3P( InputArray objectPoints, InputArray imagePoints, + InputArray cameraMatrix, InputArray distCoeffs, + OutputArrayOfArrays rvecs, OutputArrayOfArrays tvecs, + int flags ); + +/** @brief Refine a pose (the translation and the rotation that transform a 3D point expressed in the object coordinate frame +to the camera coordinate frame) from a 3D-2D point correspondences and starting from an initial solution. + +@param objectPoints Array of object points in the object coordinate space, Nx3 1-channel or 1xN/Nx1 3-channel, +where N is the number of points. vector\ can also be passed here. +@param imagePoints Array of corresponding image points, Nx2 1-channel or 1xN/Nx1 2-channel, +where N is the number of points. vector\ can also be passed here. +@param cameraMatrix Input camera matrix \f$A = \vecthreethree{f_x}{0}{c_x}{0}{f_y}{c_y}{0}{0}{1}\f$ . +@param distCoeffs Input vector of distortion coefficients +\f$(k_1, k_2, p_1, p_2[, k_3[, k_4, k_5, k_6 [, s_1, s_2, s_3, s_4[, \tau_x, \tau_y]]]])\f$ of +4, 5, 8, 12 or 14 elements. If the vector is NULL/empty, the zero distortion coefficients are +assumed. +@param rvec Input/Output rotation vector (see @ref Rodrigues ) that, together with tvec, brings points from +the model coordinate system to the camera coordinate system. Input values are used as an initial solution. +@param tvec Input/Output translation vector. Input values are used as an initial solution. +@param criteria Criteria when to stop the Levenberg-Marquard iterative algorithm. + +The function refines the object pose given at least 3 object points, their corresponding image +projections, an initial solution for the rotation and translation vector, +as well as the camera matrix and the distortion coefficients. +The function minimizes the projection error with respect to the rotation and the translation vectors, according +to a Levenberg-Marquardt iterative minimization @cite Madsen04 @cite Eade13 process. + */ +CV_EXPORTS_W void solvePnPRefineLM( InputArray objectPoints, InputArray imagePoints, + InputArray cameraMatrix, InputArray distCoeffs, + InputOutputArray rvec, InputOutputArray tvec, + TermCriteria criteria = TermCriteria(TermCriteria::EPS + TermCriteria::COUNT, 20, FLT_EPSILON)); + +/** @brief Refine a pose (the translation and the rotation that transform a 3D point expressed in the object coordinate frame +to the camera coordinate frame) from a 3D-2D point correspondences and starting from an initial solution. + +@param objectPoints Array of object points in the object coordinate space, Nx3 1-channel or 1xN/Nx1 3-channel, +where N is the number of points. vector\ can also be passed here. +@param imagePoints Array of corresponding image points, Nx2 1-channel or 1xN/Nx1 2-channel, +where N is the number of points. vector\ can also be passed here. +@param cameraMatrix Input camera matrix \f$A = \vecthreethree{f_x}{0}{c_x}{0}{f_y}{c_y}{0}{0}{1}\f$ . +@param distCoeffs Input vector of distortion coefficients +\f$(k_1, k_2, p_1, p_2[, k_3[, k_4, k_5, k_6 [, s_1, s_2, s_3, s_4[, \tau_x, \tau_y]]]])\f$ of +4, 5, 8, 12 or 14 elements. If the vector is NULL/empty, the zero distortion coefficients are +assumed. +@param rvec Input/Output rotation vector (see @ref Rodrigues ) that, together with tvec, brings points from +the model coordinate system to the camera coordinate system. Input values are used as an initial solution. +@param tvec Input/Output translation vector. Input values are used as an initial solution. +@param criteria Criteria when to stop the Levenberg-Marquard iterative algorithm. +@param VVSlambda Gain for the virtual visual servoing control law, equivalent to the \f$\alpha\f$ +gain in the Damped Gauss-Newton formulation. + +The function refines the object pose given at least 3 object points, their corresponding image +projections, an initial solution for the rotation and translation vector, +as well as the camera matrix and the distortion coefficients. +The function minimizes the projection error with respect to the rotation and the translation vectors, using a +virtual visual servoing (VVS) @cite Chaumette06 @cite Marchand16 scheme. + */ +CV_EXPORTS_W void solvePnPRefineVVS( InputArray objectPoints, InputArray imagePoints, + InputArray cameraMatrix, InputArray distCoeffs, + InputOutputArray rvec, InputOutputArray tvec, + TermCriteria criteria = TermCriteria(TermCriteria::EPS + TermCriteria::COUNT, 20, FLT_EPSILON), + double VVSlambda = 1); + +/** @brief Finds an object pose from 3D-2D point correspondences. +This function returns a list of all the possible solutions (a solution is a +couple), depending on the number of input points and the chosen method: +- P3P methods (@ref SOLVEPNP_P3P, @ref SOLVEPNP_AP3P): 3 or 4 input points. Number of returned solutions can be between 0 and 4 with 3 input points. +- @ref SOLVEPNP_IPPE Input points must be >= 4 and object points must be coplanar. Returns 2 solutions. +- @ref SOLVEPNP_IPPE_SQUARE Special case suitable for marker pose estimation. +Number of input points must be 4 and 2 solutions are returned. Object points must be defined in the following order: + - point 0: [-squareLength / 2, squareLength / 2, 0] + - point 1: [ squareLength / 2, squareLength / 2, 0] + - point 2: [ squareLength / 2, -squareLength / 2, 0] + - point 3: [-squareLength / 2, -squareLength / 2, 0] +- for all the other flags, number of input points must be >= 4 and object points can be in any configuration. +Only 1 solution is returned. + +@param objectPoints Array of object points in the object coordinate space, Nx3 1-channel or +1xN/Nx1 3-channel, where N is the number of points. vector\ can be also passed here. +@param imagePoints Array of corresponding image points, Nx2 1-channel or 1xN/Nx1 2-channel, +where N is the number of points. vector\ can be also passed here. +@param cameraMatrix Input camera matrix \f$A = \vecthreethree{f_x}{0}{c_x}{0}{f_y}{c_y}{0}{0}{1}\f$ . +@param distCoeffs Input vector of distortion coefficients +\f$(k_1, k_2, p_1, p_2[, k_3[, k_4, k_5, k_6 [, s_1, s_2, s_3, s_4[, \tau_x, \tau_y]]]])\f$ of +4, 5, 8, 12 or 14 elements. If the vector is NULL/empty, the zero distortion coefficients are +assumed. +@param rvecs Vector of output rotation vectors (see @ref Rodrigues ) that, together with tvecs, brings points from +the model coordinate system to the camera coordinate system. +@param tvecs Vector of output translation vectors. +@param useExtrinsicGuess Parameter used for #SOLVEPNP_ITERATIVE. If true (1), the function uses +the provided rvec and tvec values as initial approximations of the rotation and translation +vectors, respectively, and further optimizes them. +@param flags Method for solving a PnP problem: +- **SOLVEPNP_ITERATIVE** Iterative method is based on a Levenberg-Marquardt optimization. In +this case the function finds such a pose that minimizes reprojection error, that is the sum +of squared distances between the observed projections imagePoints and the projected (using +projectPoints ) objectPoints . +- **SOLVEPNP_P3P** Method is based on the paper of X.S. Gao, X.-R. Hou, J. Tang, H.-F. Chang +"Complete Solution Classification for the Perspective-Three-Point Problem" (@cite gao2003complete). +In this case the function requires exactly four object and image points. +- **SOLVEPNP_AP3P** Method is based on the paper of T. Ke, S. Roumeliotis +"An Efficient Algebraic Solution to the Perspective-Three-Point Problem" (@cite Ke17). +In this case the function requires exactly four object and image points. +- **SOLVEPNP_EPNP** Method has been introduced by F.Moreno-Noguer, V.Lepetit and P.Fua in the +paper "EPnP: Efficient Perspective-n-Point Camera Pose Estimation" (@cite lepetit2009epnp). +- **SOLVEPNP_DLS** Method is based on the paper of Joel A. Hesch and Stergios I. Roumeliotis. +"A Direct Least-Squares (DLS) Method for PnP" (@cite hesch2011direct). +- **SOLVEPNP_UPNP** Method is based on the paper of A.Penate-Sanchez, J.Andrade-Cetto, +F.Moreno-Noguer. "Exhaustive Linearization for Robust Camera Pose and Focal Length +Estimation" (@cite penate2013exhaustive). In this case the function also estimates the parameters \f$f_x\f$ and \f$f_y\f$ +assuming that both have the same value. Then the cameraMatrix is updated with the estimated +focal length. +- **SOLVEPNP_IPPE** Method is based on the paper of T. Collins and A. Bartoli. +"Infinitesimal Plane-Based Pose Estimation" (@cite Collins14). This method requires coplanar object points. +- **SOLVEPNP_IPPE_SQUARE** Method is based on the paper of Toby Collins and Adrien Bartoli. +"Infinitesimal Plane-Based Pose Estimation" (@cite Collins14). This method is suitable for marker pose estimation. +It requires 4 coplanar object points defined in the following order: + - point 0: [-squareLength / 2, squareLength / 2, 0] + - point 1: [ squareLength / 2, squareLength / 2, 0] + - point 2: [ squareLength / 2, -squareLength / 2, 0] + - point 3: [-squareLength / 2, -squareLength / 2, 0] +@param rvec Rotation vector used to initialize an iterative PnP refinement algorithm, when flag is SOLVEPNP_ITERATIVE +and useExtrinsicGuess is set to true. +@param tvec Translation vector used to initialize an iterative PnP refinement algorithm, when flag is SOLVEPNP_ITERATIVE +and useExtrinsicGuess is set to true. +@param reprojectionError Optional vector of reprojection error, that is the RMS error +(\f$ \text{RMSE} = \sqrt{\frac{\sum_{i}^{N} \left ( \hat{y_i} - y_i \right )^2}{N}} \f$) between the input image points +and the 3D object points projected with the estimated pose. + +The function estimates the object pose given a set of object points, their corresponding image +projections, as well as the camera matrix and the distortion coefficients, see the figure below +(more precisely, the X-axis of the camera frame is pointing to the right, the Y-axis downward +and the Z-axis forward). + +![](pnp.jpg) + +Points expressed in the world frame \f$ \bf{X}_w \f$ are projected into the image plane \f$ \left[ u, v \right] \f$ +using the perspective projection model \f$ \Pi \f$ and the camera intrinsic parameters matrix \f$ \bf{A} \f$: + +\f[ + \begin{align*} + \begin{bmatrix} + u \\ + v \\ + 1 + \end{bmatrix} &= + \bf{A} \hspace{0.1em} \Pi \hspace{0.2em} ^{c}\bf{T}_w + \begin{bmatrix} + X_{w} \\ + Y_{w} \\ + Z_{w} \\ + 1 + \end{bmatrix} \\ + \begin{bmatrix} + u \\ + v \\ + 1 + \end{bmatrix} &= + \begin{bmatrix} + f_x & 0 & c_x \\ + 0 & f_y & c_y \\ + 0 & 0 & 1 + \end{bmatrix} + \begin{bmatrix} + 1 & 0 & 0 & 0 \\ + 0 & 1 & 0 & 0 \\ + 0 & 0 & 1 & 0 + \end{bmatrix} + \begin{bmatrix} + r_{11} & r_{12} & r_{13} & t_x \\ + r_{21} & r_{22} & r_{23} & t_y \\ + r_{31} & r_{32} & r_{33} & t_z \\ + 0 & 0 & 0 & 1 + \end{bmatrix} + \begin{bmatrix} + X_{w} \\ + Y_{w} \\ + Z_{w} \\ + 1 + \end{bmatrix} + \end{align*} +\f] + +The estimated pose is thus the rotation (`rvec`) and the translation (`tvec`) vectors that allow transforming +a 3D point expressed in the world frame into the camera frame: + +\f[ + \begin{align*} + \begin{bmatrix} + X_c \\ + Y_c \\ + Z_c \\ + 1 + \end{bmatrix} &= + \hspace{0.2em} ^{c}\bf{T}_w + \begin{bmatrix} + X_{w} \\ + Y_{w} \\ + Z_{w} \\ + 1 + \end{bmatrix} \\ + \begin{bmatrix} + X_c \\ + Y_c \\ + Z_c \\ + 1 + \end{bmatrix} &= + \begin{bmatrix} + r_{11} & r_{12} & r_{13} & t_x \\ + r_{21} & r_{22} & r_{23} & t_y \\ + r_{31} & r_{32} & r_{33} & t_z \\ + 0 & 0 & 0 & 1 + \end{bmatrix} + \begin{bmatrix} + X_{w} \\ + Y_{w} \\ + Z_{w} \\ + 1 + \end{bmatrix} + \end{align*} +\f] + +@note + - An example of how to use solvePnP for planar augmented reality can be found at + opencv_source_code/samples/python/plane_ar.py + - If you are using Python: + - Numpy array slices won't work as input because solvePnP requires contiguous + arrays (enforced by the assertion using cv::Mat::checkVector() around line 55 of + modules/calib3d/src/solvepnp.cpp version 2.4.9) + - The P3P algorithm requires image points to be in an array of shape (N,1,2) due + to its calling of cv::undistortPoints (around line 75 of modules/calib3d/src/solvepnp.cpp version 2.4.9) + which requires 2-channel information. + - Thus, given some data D = np.array(...) where D.shape = (N,M), in order to use a subset of + it as, e.g., imagePoints, one must effectively copy it into a new array: imagePoints = + np.ascontiguousarray(D[:,:2]).reshape((N,1,2)) + - The methods **SOLVEPNP_DLS** and **SOLVEPNP_UPNP** cannot be used as the current implementations are + unstable and sometimes give completely wrong results. If you pass one of these two + flags, **SOLVEPNP_EPNP** method will be used instead. + - The minimum number of points is 4 in the general case. In the case of **SOLVEPNP_P3P** and **SOLVEPNP_AP3P** + methods, it is required to use exactly 4 points (the first 3 points are used to estimate all the solutions + of the P3P problem, the last one is used to retain the best solution that minimizes the reprojection error). + - With **SOLVEPNP_ITERATIVE** method and `useExtrinsicGuess=true`, the minimum number of points is 3 (3 points + are sufficient to compute a pose but there are up to 4 solutions). The initial solution should be close to the + global solution to converge. + - With **SOLVEPNP_IPPE** input points must be >= 4 and object points must be coplanar. + - With **SOLVEPNP_IPPE_SQUARE** this is a special case suitable for marker pose estimation. + Number of input points must be 4. Object points must be defined in the following order: + - point 0: [-squareLength / 2, squareLength / 2, 0] + - point 1: [ squareLength / 2, squareLength / 2, 0] + - point 2: [ squareLength / 2, -squareLength / 2, 0] + - point 3: [-squareLength / 2, -squareLength / 2, 0] + */ +CV_EXPORTS_W int solvePnPGeneric( InputArray objectPoints, InputArray imagePoints, + InputArray cameraMatrix, InputArray distCoeffs, + OutputArrayOfArrays rvecs, OutputArrayOfArrays tvecs, + bool useExtrinsicGuess = false, SolvePnPMethod flags = SOLVEPNP_ITERATIVE, + InputArray rvec = noArray(), InputArray tvec = noArray(), + OutputArray reprojectionError = noArray() ); + /** @brief Finds an initial camera matrix from 3D-2D point correspondences. @param objectPoints Vector of vectors of the calibration pattern points in the calibration pattern @@ -635,14 +1392,14 @@ CV_EXPORTS_W Mat initCameraMatrix2D( InputArrayOfArrays objectPoints, @param image Source chessboard view. It must be an 8-bit grayscale or color image. @param patternSize Number of inner corners per a chessboard row and column -( patternSize = cvSize(points_per_row,points_per_colum) = cvSize(columns,rows) ). +( patternSize = cv::Size(points_per_row,points_per_colum) = cv::Size(columns,rows) ). @param corners Output array of detected corners. @param flags Various operation flags that can be zero or a combination of the following values: -- **CV_CALIB_CB_ADAPTIVE_THRESH** Use adaptive thresholding to convert the image to black +- **CALIB_CB_ADAPTIVE_THRESH** Use adaptive thresholding to convert the image to black and white, rather than a fixed threshold level (computed from the average image brightness). -- **CV_CALIB_CB_NORMALIZE_IMAGE** Normalize the image gamma with equalizeHist before +- **CALIB_CB_NORMALIZE_IMAGE** Normalize the image gamma with equalizeHist before applying fixed or adaptive thresholding. -- **CV_CALIB_CB_FILTER_QUADS** Use additional criteria (like contour area, perimeter, +- **CALIB_CB_FILTER_QUADS** Use additional criteria (like contour area, perimeter, square-like shape) to filter out false quads extracted at the contour retrieval stage. - **CALIB_CB_FAST_CHECK** Run a fast check on the image that looks for chessboard corners, and shortcut the call if none is found. This can drastically speed up the call in the @@ -671,7 +1428,7 @@ Sample usage of detecting and drawing chessboard corners: : if(patternfound) cornerSubPix(gray, corners, Size(11, 11), Size(-1, -1), - TermCriteria(CV_TERMCRIT_EPS + CV_TERMCRIT_ITER, 30, 0.1)); + TermCriteria(cv::TermCriteria::EPS + cv::TermCriteria::MAX_ITER, 30, 0.1)); drawChessboardCorners(img, patternsize, Mat(corners), patternfound); @endcode @@ -683,8 +1440,105 @@ square grouping and ordering algorithm fails. CV_EXPORTS_W bool findChessboardCorners( InputArray image, Size patternSize, OutputArray corners, int flags = CALIB_CB_ADAPTIVE_THRESH + CALIB_CB_NORMALIZE_IMAGE ); +/* + Checks whether the image contains chessboard of the specific size or not. + If yes, nonzero value is returned. +*/ +CV_EXPORTS_W bool checkChessboard(InputArray img, Size size); + +/** @brief Finds the positions of internal corners of the chessboard using a sector based approach. + +@param image Source chessboard view. It must be an 8-bit grayscale or color image. +@param patternSize Number of inner corners per a chessboard row and column +( patternSize = cv::Size(points_per_row,points_per_colum) = cv::Size(columns,rows) ). +@param corners Output array of detected corners. +@param flags Various operation flags that can be zero or a combination of the following values: +- **CALIB_CB_NORMALIZE_IMAGE** Normalize the image gamma with equalizeHist before detection. +- **CALIB_CB_EXHAUSTIVE** Run an exhaustive search to improve detection rate. +- **CALIB_CB_ACCURACY** Up sample input image to improve sub-pixel accuracy due to aliasing effects. +- **CALIB_CB_LARGER** The detected pattern is allowed to be larger than patternSize (see description). +- **CALIB_CB_MARKER** The detected pattern must have a marker (see description). +This should be used if an accurate camera calibration is required. +@param meta Optional output arrray of detected corners (CV_8UC1 and size = cv::Size(columns,rows)). +Each entry stands for one corner of the pattern and can have one of the following values: +- 0 = no meta data attached +- 1 = left-top corner of a black cell +- 2 = left-top corner of a white cell +- 3 = left-top corner of a black cell with a white marker dot +- 4 = left-top corner of a white cell with a black marker dot (pattern origin in case of markers otherwise first corner) + +The function is analog to findchessboardCorners but uses a localized radon +transformation approximated by box filters being more robust to all sort of +noise, faster on larger images and is able to directly return the sub-pixel +position of the internal chessboard corners. The Method is based on the paper +@cite duda2018 "Accurate Detection and Localization of Checkerboard Corners for +Calibration" demonstrating that the returned sub-pixel positions are more +accurate than the one returned by cornerSubPix allowing a precise camera +calibration for demanding applications. + +In the case, the flags **CALIB_CB_LARGER** or **CALIB_CB_MARKER** are given, +the result can be recovered from the optional meta array. Both flags are +helpful to use calibration patterns exceeding the field of view of the camera. +These oversized patterns allow more accurate calibrations as corners can be +utilized, which are as close as possible to the image borders. For a +consistent coordinate system across all images, the optional marker (see image +below) can be used to move the origin of the board to the location where the +black circle is located. + +@note The function requires a white boarder with roughly the same width as one +of the checkerboard fields around the whole board to improve the detection in +various environments. In addition, because of the localized radon +transformation it is beneficial to use round corners for the field corners +which are located on the outside of the board. The following figure illustrates +a sample checkerboard optimized for the detection. However, any other checkerboard +can be used as well. +![Checkerboard](pics/checkerboard_radon.png) + */ +CV_EXPORTS_AS(findChessboardCornersSBWithMeta) +bool findChessboardCornersSB(InputArray image,Size patternSize, OutputArray corners, + int flags,OutputArray meta); +/** @overload */ +CV_EXPORTS_W inline +bool findChessboardCornersSB(InputArray image, Size patternSize, OutputArray corners, + int flags = 0) +{ + return findChessboardCornersSB(image, patternSize, corners, flags, noArray()); +} + +/** @brief Estimates the sharpness of a detected chessboard. + +Image sharpness, as well as brightness, are a critical parameter for accuracte +camera calibration. For accessing these parameters for filtering out +problematic calibraiton images, this method calculates edge profiles by traveling from +black to white chessboard cell centers. Based on this, the number of pixels is +calculated required to transit from black to white. This width of the +transition area is a good indication of how sharp the chessboard is imaged +and should be below ~3.0 pixels. + +@param image Gray image used to find chessboard corners +@param patternSize Size of a found chessboard pattern +@param corners Corners found by findChessboardCorners(SB) +@param rise_distance Rise distance 0.8 means 10% ... 90% of the final signal strength +@param vertical By default edge responses for horizontal lines are calculated +@param sharpness Optional output array with a sharpness value for calculated edge responses (see description) + +The optional sharpness array is of type CV_32FC1 and has for each calculated +profile one row with the following five entries: +* 0 = x coordinate of the underlying edge in the image +* 1 = y coordinate of the underlying edge in the image +* 2 = width of the transition area (sharpness) +* 3 = signal strength in the black cell (min brightness) +* 4 = signal strength in the white cell (max brightness) + +@return Scalar(average sharpness, average min brightness, average max brightness,0) +*/ +CV_EXPORTS_W Scalar estimateChessboardSharpness(InputArray image, Size patternSize, InputArray corners, + float rise_distance=0.8F,bool vertical=false, + OutputArray sharpness=noArray()); + + //! finds subpixel-accurate positions of the chessboard corners -CV_EXPORTS bool find4QuadCornerSubpix( InputArray img, InputOutputArray corners, Size region_size ); +CV_EXPORTS_W bool find4QuadCornerSubpix( InputArray img, InputOutputArray corners, Size region_size ); /** @brief Renders the detected chessboard corners. @@ -701,6 +1555,57 @@ found, or as colored corners connected with lines if the board was found. CV_EXPORTS_W void drawChessboardCorners( InputOutputArray image, Size patternSize, InputArray corners, bool patternWasFound ); +/** @brief Draw axes of the world/object coordinate system from pose estimation. @sa solvePnP + +@param image Input/output image. It must have 1 or 3 channels. The number of channels is not altered. +@param cameraMatrix Input 3x3 floating-point matrix of camera intrinsic parameters. +\f$A = \vecthreethree{f_x}{0}{c_x}{0}{f_y}{c_y}{0}{0}{1}\f$ +@param distCoeffs Input vector of distortion coefficients +\f$(k_1, k_2, p_1, p_2[, k_3[, k_4, k_5, k_6 [, s_1, s_2, s_3, s_4[, \tau_x, \tau_y]]]])\f$ of +4, 5, 8, 12 or 14 elements. If the vector is empty, the zero distortion coefficients are assumed. +@param rvec Rotation vector (see @ref Rodrigues ) that, together with tvec, brings points from +the model coordinate system to the camera coordinate system. +@param tvec Translation vector. +@param length Length of the painted axes in the same unit than tvec (usually in meters). +@param thickness Line thickness of the painted axes. + +This function draws the axes of the world/object coordinate system w.r.t. to the camera frame. +OX is drawn in red, OY in green and OZ in blue. + */ +CV_EXPORTS_W void drawFrameAxes(InputOutputArray image, InputArray cameraMatrix, InputArray distCoeffs, + InputArray rvec, InputArray tvec, float length, int thickness=3); + +struct CV_EXPORTS_W_SIMPLE CirclesGridFinderParameters +{ + CV_WRAP CirclesGridFinderParameters(); + CV_PROP_RW cv::Size2f densityNeighborhoodSize; + CV_PROP_RW float minDensity; + CV_PROP_RW int kmeansAttempts; + CV_PROP_RW int minDistanceToAddKeypoint; + CV_PROP_RW int keypointScale; + CV_PROP_RW float minGraphConfidence; + CV_PROP_RW float vertexGain; + CV_PROP_RW float vertexPenalty; + CV_PROP_RW float existingVertexGain; + CV_PROP_RW float edgeGain; + CV_PROP_RW float edgePenalty; + CV_PROP_RW float convexHullFactor; + CV_PROP_RW float minRNGEdgeSwitchDist; + + enum GridType + { + SYMMETRIC_GRID, ASYMMETRIC_GRID + }; + GridType gridType; + + CV_PROP_RW float squareSize; //!< Distance between two adjacent points. Used by CALIB_CB_CLUSTERING. + CV_PROP_RW float maxRectifiedDistance; //!< Max deviation from prediction. Used by CALIB_CB_CLUSTERING. +}; + +#ifndef DISABLE_OPENCV_3_COMPATIBILITY +typedef CirclesGridFinderParameters CirclesGridFinderParameters2; +#endif + /** @brief Finds centers in the grid of circles. @param image grid view of input circles; it must be an 8-bit grayscale or color image. @@ -713,6 +1618,7 @@ CV_EXPORTS_W void drawChessboardCorners( InputOutputArray image, Size patternSiz - **CALIB_CB_CLUSTERING** uses a special algorithm for grid detection. It is more robust to perspective distortions but much more sensitive to background clutter. @param blobDetector feature detector that finds blobs like dark circles on light background. +@param parameters struct for finding circles in a grid pattern. The function attempts to determine whether the input image contains a grid of circles. If it is, the function locates centers of the circles. The function returns a non-zero value if all of the centers @@ -732,60 +1638,78 @@ Sample usage of detecting and drawing the centers of circles: : @note The function requires white space (like a square-thick border, the wider the better) around the board to make the detection more robust in various environments. */ +CV_EXPORTS_W bool findCirclesGrid( InputArray image, Size patternSize, + OutputArray centers, int flags, + const Ptr &blobDetector, + const CirclesGridFinderParameters& parameters); + +/** @overload */ CV_EXPORTS_W bool findCirclesGrid( InputArray image, Size patternSize, OutputArray centers, int flags = CALIB_CB_SYMMETRIC_GRID, const Ptr &blobDetector = SimpleBlobDetector::create()); -/** @brief Finds the camera intrinsic and extrinsic parameters from several views of a calibration pattern. +/** @brief Finds the camera intrinsic and extrinsic parameters from several views of a calibration +pattern. @param objectPoints In the new interface it is a vector of vectors of calibration pattern points in the calibration pattern coordinate space (e.g. std::vector>). The outer -vector contains as many elements as the number of the pattern views. If the same calibration pattern +vector contains as many elements as the number of pattern views. If the same calibration pattern is shown in each view and it is fully visible, all the vectors will be the same. Although, it is -possible to use partially occluded patterns, or even different patterns in different views. Then, -the vectors will be different. The points are 3D, but since they are in a pattern coordinate system, -then, if the rig is planar, it may make sense to put the model to a XY coordinate plane so that -Z-coordinate of each input object point is 0. +possible to use partially occluded patterns or even different patterns in different views. Then, +the vectors will be different. Although the points are 3D, they all lie in the calibration pattern's +XY coordinate plane (thus 0 in the Z-coordinate), if the used calibration pattern is a planar rig. In the old interface all the vectors of object points from different views are concatenated together. @param imagePoints In the new interface it is a vector of vectors of the projections of calibration pattern points (e.g. std::vector>). imagePoints.size() and -objectPoints.size() and imagePoints[i].size() must be equal to objectPoints[i].size() for each i. -In the old interface all the vectors of object points from different views are concatenated -together. +objectPoints.size(), and imagePoints[i].size() and objectPoints[i].size() for each i, must be equal, +respectively. In the old interface all the vectors of object points from different views are +concatenated together. @param imageSize Size of the image used only to initialize the intrinsic camera matrix. -@param cameraMatrix Output 3x3 floating-point camera matrix +@param cameraMatrix Input/output 3x3 floating-point camera matrix \f$A = \vecthreethree{f_x}{0}{c_x}{0}{f_y}{c_y}{0}{0}{1}\f$ . If CV\_CALIB\_USE\_INTRINSIC\_GUESS -and/or CV_CALIB_FIX_ASPECT_RATIO are specified, some or all of fx, fy, cx, cy must be +and/or CALIB_FIX_ASPECT_RATIO are specified, some or all of fx, fy, cx, cy must be initialized before calling the function. -@param distCoeffs Output vector of distortion coefficients +@param distCoeffs Input/output vector of distortion coefficients \f$(k_1, k_2, p_1, p_2[, k_3[, k_4, k_5, k_6 [, s_1, s_2, s_3, s_4[, \tau_x, \tau_y]]]])\f$ of 4, 5, 8, 12 or 14 elements. -@param rvecs Output vector of rotation vectors (see Rodrigues ) estimated for each pattern view -(e.g. std::vector>). That is, each k-th rotation vector together with the corresponding -k-th translation vector (see the next output parameter description) brings the calibration pattern -from the model coordinate space (in which object points are specified) to the world coordinate -space, that is, a real position of the calibration pattern in the k-th pattern view (k=0.. *M* -1). -@param tvecs Output vector of translation vectors estimated for each pattern view. +@param rvecs Output vector of rotation vectors (@ref Rodrigues ) estimated for each pattern view +(e.g. std::vector>). That is, each i-th rotation vector together with the corresponding +i-th translation vector (see the next output parameter description) brings the calibration pattern +from the object coordinate space (in which object points are specified) to the camera coordinate +space. In more technical terms, the tuple of the i-th rotation and translation vector performs +a change of basis from object coordinate space to camera coordinate space. Due to its duality, this +tuple is equivalent to the position of the calibration pattern with respect to the camera coordinate +space. +@param tvecs Output vector of translation vectors estimated for each pattern view, see parameter +describtion above. +@param stdDeviationsIntrinsics Output vector of standard deviations estimated for intrinsic +parameters. Order of deviations values: +\f$(f_x, f_y, c_x, c_y, k_1, k_2, p_1, p_2, k_3, k_4, k_5, k_6 , s_1, s_2, s_3, + s_4, \tau_x, \tau_y)\f$ If one of parameters is not estimated, it's deviation is equals to zero. +@param stdDeviationsExtrinsics Output vector of standard deviations estimated for extrinsic +parameters. Order of deviations values: \f$(R_0, T_0, \dotsc , R_{M - 1}, T_{M - 1})\f$ where M is +the number of pattern views. \f$R_i, T_i\f$ are concatenated 1x3 vectors. + @param perViewErrors Output vector of the RMS re-projection error estimated for each pattern view. @param flags Different flags that may be zero or a combination of the following values: -- **CV_CALIB_USE_INTRINSIC_GUESS** cameraMatrix contains valid initial values of +- **CALIB_USE_INTRINSIC_GUESS** cameraMatrix contains valid initial values of fx, fy, cx, cy that are optimized further. Otherwise, (cx, cy) is initially set to the image center ( imageSize is used), and focal distances are computed in a least-squares fashion. Note, that if intrinsic parameters are known, there is no need to use this function just to estimate extrinsic parameters. Use solvePnP instead. -- **CV_CALIB_FIX_PRINCIPAL_POINT** The principal point is not changed during the global +- **CALIB_FIX_PRINCIPAL_POINT** The principal point is not changed during the global optimization. It stays at the center or at a different location specified when -CV_CALIB_USE_INTRINSIC_GUESS is set too. -- **CV_CALIB_FIX_ASPECT_RATIO** The functions considers only fy as a free parameter. The +CALIB_USE_INTRINSIC_GUESS is set too. +- **CALIB_FIX_ASPECT_RATIO** The functions consider only fy as a free parameter. The ratio fx/fy stays the same as in the input cameraMatrix . When -CV_CALIB_USE_INTRINSIC_GUESS is not set, the actual input values of fx and fy are +CALIB_USE_INTRINSIC_GUESS is not set, the actual input values of fx and fy are ignored, only their ratio is computed and used further. -- **CV_CALIB_ZERO_TANGENT_DIST** Tangential distortion coefficients \f$(p_1, p_2)\f$ are set +- **CALIB_ZERO_TANGENT_DIST** Tangential distortion coefficients \f$(p_1, p_2)\f$ are set to zeros and stay zero. -- **CV_CALIB_FIX_K1,...,CV_CALIB_FIX_K6** The corresponding radial distortion -coefficient is not changed during the optimization. If CV_CALIB_USE_INTRINSIC_GUESS is +- **CALIB_FIX_K1,...,CALIB_FIX_K6** The corresponding radial distortion +coefficient is not changed during the optimization. If CALIB_USE_INTRINSIC_GUESS is set, the coefficient from the supplied distCoeffs matrix is used. Otherwise, it is set to 0. -- **CV_CALIB_RATIONAL_MODEL** Coefficients k4, k5, and k6 are enabled. To provide the +- **CALIB_RATIONAL_MODEL** Coefficients k4, k5, and k6 are enabled. To provide the backward compatibility, this extra flag should be explicitly specified to make the calibration function use the rational model and return 8 coefficients. If the flag is not set, the function computes and returns only 5 distortion coefficients. @@ -794,24 +1718,26 @@ backward compatibility, this extra flag should be explicitly specified to make t calibration function use the thin prism model and return 12 coefficients. If the flag is not set, the function computes and returns only 5 distortion coefficients. - **CALIB_FIX_S1_S2_S3_S4** The thin prism distortion coefficients are not changed during -the optimization. If CV_CALIB_USE_INTRINSIC_GUESS is set, the coefficient from the +the optimization. If CALIB_USE_INTRINSIC_GUESS is set, the coefficient from the supplied distCoeffs matrix is used. Otherwise, it is set to 0. - **CALIB_TILTED_MODEL** Coefficients tauX and tauY are enabled. To provide the backward compatibility, this extra flag should be explicitly specified to make the calibration function use the tilted sensor model and return 14 coefficients. If the flag is not set, the function computes and returns only 5 distortion coefficients. - **CALIB_FIX_TAUX_TAUY** The coefficients of the tilted sensor model are not changed during -the optimization. If CV_CALIB_USE_INTRINSIC_GUESS is set, the coefficient from the +the optimization. If CALIB_USE_INTRINSIC_GUESS is set, the coefficient from the supplied distCoeffs matrix is used. Otherwise, it is set to 0. @param criteria Termination criteria for the iterative optimization algorithm. +@return the overall RMS re-projection error. + The function estimates the intrinsic camera parameters and extrinsic parameters for each of the views. The algorithm is based on @cite Zhang2000 and @cite BouguetMCT . The coordinates of 3D object points and their corresponding 2D projections in each view must be specified. That may be achieved -by using an object with a known geometry and easily detectable feature points. Such an object is +by using an object with known geometry and easily detectable feature points. Such an object is called a calibration rig or calibration pattern, and OpenCV has built-in support for a chessboard as -a calibration rig (see findChessboardCorners ). Currently, initialization of intrinsic parameters -(when CV_CALIB_USE_INTRINSIC_GUESS is not set) is only implemented for planar calibration +a calibration rig (see @ref findChessboardCorners). Currently, initialization of intrinsic +parameters (when CALIB_USE_INTRINSIC_GUESS is not set) is only implemented for planar calibration patterns (where Z-coordinates of the object points must be all zeros). 3D calibration rigs can also be used as long as initial cameraMatrix is provided. @@ -819,7 +1745,7 @@ The algorithm performs the following steps: - Compute the initial intrinsic parameters (the option only available for planar calibration patterns) or read them from the input parameters. The distortion coefficients are all set to - zeros initially unless some of CV_CALIB_FIX_K? are specified. + zeros initially unless some of CALIB_FIX_K? are specified. - Estimate the initial camera pose as if the intrinsic parameters have been already known. This is done using solvePnP . @@ -829,18 +1755,28 @@ The algorithm performs the following steps: the projected (using the current estimates for camera parameters and the poses) object points objectPoints. See projectPoints for details. -The function returns the final re-projection error. - @note - If you use a non-square (=non-NxN) grid and findChessboardCorners for calibration, and - calibrateCamera returns bad values (zero distortion coefficients, an image center very far from - (w/2-0.5,h/2-0.5), and/or large differences between \f$f_x\f$ and \f$f_y\f$ (ratios of 10:1 or more)), - then you have probably used patternSize=cvSize(rows,cols) instead of using - patternSize=cvSize(cols,rows) in findChessboardCorners . + If you use a non-square (i.e. non-N-by-N) grid and @ref findChessboardCorners for calibration, + and @ref calibrateCamera returns bad values (zero distortion coefficients, \f$c_x\f$ and + \f$c_y\f$ very far from the image center, and/or large differences between \f$f_x\f$ and + \f$f_y\f$ (ratios of 10:1 or more)), then you are probably using patternSize=cvSize(rows,cols) + instead of using patternSize=cvSize(cols,rows) in @ref findChessboardCorners. @sa - findChessboardCorners, solvePnP, initCameraMatrix2D, stereoCalibrate, undistort + calibrateCameraRO, findChessboardCorners, solvePnP, initCameraMatrix2D, stereoCalibrate, + undistort */ +CV_EXPORTS_AS(calibrateCameraExtended) double calibrateCamera( InputArrayOfArrays objectPoints, + InputArrayOfArrays imagePoints, Size imageSize, + InputOutputArray cameraMatrix, InputOutputArray distCoeffs, + OutputArrayOfArrays rvecs, OutputArrayOfArrays tvecs, + OutputArray stdDeviationsIntrinsics, + OutputArray stdDeviationsExtrinsics, + OutputArray perViewErrors, + int flags = 0, TermCriteria criteria = TermCriteria( + TermCriteria::COUNT + TermCriteria::EPS, 30, DBL_EPSILON) ); + +/** @overload */ CV_EXPORTS_W double calibrateCamera( InputArrayOfArrays objectPoints, InputArrayOfArrays imagePoints, Size imageSize, InputOutputArray cameraMatrix, InputOutputArray distCoeffs, @@ -848,6 +1784,84 @@ CV_EXPORTS_W double calibrateCamera( InputArrayOfArrays objectPoints, int flags = 0, TermCriteria criteria = TermCriteria( TermCriteria::COUNT + TermCriteria::EPS, 30, DBL_EPSILON) ); +/** @brief Finds the camera intrinsic and extrinsic parameters from several views of a calibration pattern. + +This function is an extension of calibrateCamera() with the method of releasing object which was +proposed in @cite strobl2011iccv. In many common cases with inaccurate, unmeasured, roughly planar +targets (calibration plates), this method can dramatically improve the precision of the estimated +camera parameters. Both the object-releasing method and standard method are supported by this +function. Use the parameter **iFixedPoint** for method selection. In the internal implementation, +calibrateCamera() is a wrapper for this function. + +@param objectPoints Vector of vectors of calibration pattern points in the calibration pattern +coordinate space. See calibrateCamera() for details. If the method of releasing object to be used, +the identical calibration board must be used in each view and it must be fully visible, and all +objectPoints[i] must be the same and all points should be roughly close to a plane. **The calibration +target has to be rigid, or at least static if the camera (rather than the calibration target) is +shifted for grabbing images.** +@param imagePoints Vector of vectors of the projections of calibration pattern points. See +calibrateCamera() for details. +@param imageSize Size of the image used only to initialize the intrinsic camera matrix. +@param iFixedPoint The index of the 3D object point in objectPoints[0] to be fixed. It also acts as +a switch for calibration method selection. If object-releasing method to be used, pass in the +parameter in the range of [1, objectPoints[0].size()-2], otherwise a value out of this range will +make standard calibration method selected. Usually the top-right corner point of the calibration +board grid is recommended to be fixed when object-releasing method being utilized. According to +\cite strobl2011iccv, two other points are also fixed. In this implementation, objectPoints[0].front +and objectPoints[0].back.z are used. With object-releasing method, accurate rvecs, tvecs and +newObjPoints are only possible if coordinates of these three fixed points are accurate enough. +@param cameraMatrix Output 3x3 floating-point camera matrix. See calibrateCamera() for details. +@param distCoeffs Output vector of distortion coefficients. See calibrateCamera() for details. +@param rvecs Output vector of rotation vectors estimated for each pattern view. See calibrateCamera() +for details. +@param tvecs Output vector of translation vectors estimated for each pattern view. +@param newObjPoints The updated output vector of calibration pattern points. The coordinates might +be scaled based on three fixed points. The returned coordinates are accurate only if the above +mentioned three fixed points are accurate. If not needed, noArray() can be passed in. This parameter +is ignored with standard calibration method. +@param stdDeviationsIntrinsics Output vector of standard deviations estimated for intrinsic parameters. +See calibrateCamera() for details. +@param stdDeviationsExtrinsics Output vector of standard deviations estimated for extrinsic parameters. +See calibrateCamera() for details. +@param stdDeviationsObjPoints Output vector of standard deviations estimated for refined coordinates +of calibration pattern points. It has the same size and order as objectPoints[0] vector. This +parameter is ignored with standard calibration method. + @param perViewErrors Output vector of the RMS re-projection error estimated for each pattern view. +@param flags Different flags that may be zero or a combination of some predefined values. See +calibrateCamera() for details. If the method of releasing object is used, the calibration time may +be much longer. CALIB_USE_QR or CALIB_USE_LU could be used for faster calibration with potentially +less precise and less stable in some rare cases. +@param criteria Termination criteria for the iterative optimization algorithm. + +@return the overall RMS re-projection error. + +The function estimates the intrinsic camera parameters and extrinsic parameters for each of the +views. The algorithm is based on @cite Zhang2000, @cite BouguetMCT and @cite strobl2011iccv. See +calibrateCamera() for other detailed explanations. +@sa + calibrateCamera, findChessboardCorners, solvePnP, initCameraMatrix2D, stereoCalibrate, undistort + */ +CV_EXPORTS_AS(calibrateCameraROExtended) double calibrateCameraRO( InputArrayOfArrays objectPoints, + InputArrayOfArrays imagePoints, Size imageSize, int iFixedPoint, + InputOutputArray cameraMatrix, InputOutputArray distCoeffs, + OutputArrayOfArrays rvecs, OutputArrayOfArrays tvecs, + OutputArray newObjPoints, + OutputArray stdDeviationsIntrinsics, + OutputArray stdDeviationsExtrinsics, + OutputArray stdDeviationsObjPoints, + OutputArray perViewErrors, + int flags = 0, TermCriteria criteria = TermCriteria( + TermCriteria::COUNT + TermCriteria::EPS, 30, DBL_EPSILON) ); + +/** @overload */ +CV_EXPORTS_W double calibrateCameraRO( InputArrayOfArrays objectPoints, + InputArrayOfArrays imagePoints, Size imageSize, int iFixedPoint, + InputOutputArray cameraMatrix, InputOutputArray distCoeffs, + OutputArrayOfArrays rvecs, OutputArrayOfArrays tvecs, + OutputArray newObjPoints, + int flags = 0, TermCriteria criteria = TermCriteria( + TermCriteria::COUNT + TermCriteria::EPS, 30, DBL_EPSILON) ); + /** @brief Computes useful camera characteristics from the camera matrix. @param cameraMatrix Input camera matrix that can be estimated by calibrateCamera or @@ -874,45 +1888,55 @@ CV_EXPORTS_W void calibrationMatrixValues( InputArray cameraMatrix, Size imageSi CV_OUT double& focalLength, CV_OUT Point2d& principalPoint, CV_OUT double& aspectRatio ); -/** @brief Calibrates the stereo camera. +/** @brief Calibrates a stereo camera set up. This function finds the intrinsic parameters +for each of the two cameras and the extrinsic parameters between the two cameras. -@param objectPoints Vector of vectors of the calibration pattern points. +@param objectPoints Vector of vectors of the calibration pattern points. The same structure as +in @ref calibrateCamera. For each pattern view, both cameras need to see the same object +points. Therefore, objectPoints.size(), imagePoints1.size(), and imagePoints2.size() need to be +equal as well as objectPoints[i].size(), imagePoints1[i].size(), and imagePoints2[i].size() need to +be equal for each i. @param imagePoints1 Vector of vectors of the projections of the calibration pattern points, -observed by the first camera. +observed by the first camera. The same structure as in @ref calibrateCamera. @param imagePoints2 Vector of vectors of the projections of the calibration pattern points, -observed by the second camera. -@param cameraMatrix1 Input/output first camera matrix: -\f$\vecthreethree{f_x^{(j)}}{0}{c_x^{(j)}}{0}{f_y^{(j)}}{c_y^{(j)}}{0}{0}{1}\f$ , \f$j = 0,\, 1\f$ . If -any of CV_CALIB_USE_INTRINSIC_GUESS , CV_CALIB_FIX_ASPECT_RATIO , -CV_CALIB_FIX_INTRINSIC , or CV_CALIB_FIX_FOCAL_LENGTH are specified, some or all of the -matrix components must be initialized. See the flags description for details. -@param distCoeffs1 Input/output vector of distortion coefficients -\f$(k_1, k_2, p_1, p_2[, k_3[, k_4, k_5, k_6 [, s_1, s_2, s_3, s_4[, \tau_x, \tau_y]]]])\f$ of -4, 5, 8, 12 or 14 elements. The output vector length depends on the flags. -@param cameraMatrix2 Input/output second camera matrix. The parameter is similar to cameraMatrix1 -@param distCoeffs2 Input/output lens distortion coefficients for the second camera. The parameter -is similar to distCoeffs1 . -@param imageSize Size of the image used only to initialize intrinsic camera matrix. -@param R Output rotation matrix between the 1st and the 2nd camera coordinate systems. -@param T Output translation vector between the coordinate systems of the cameras. +observed by the second camera. The same structure as in @ref calibrateCamera. +@param cameraMatrix1 Input/output camera matrix for the first camera, the same as in +@ref calibrateCamera. Furthermore, for the stereo case, additional flags may be used, see below. +@param distCoeffs1 Input/output vector of distortion coefficients, the same as in +@ref calibrateCamera. +@param cameraMatrix2 Input/output second camera matrix for the second camera. See description for +cameraMatrix1. +@param distCoeffs2 Input/output lens distortion coefficients for the second camera. See +description for distCoeffs1. +@param imageSize Size of the image used only to initialize the intrinsic camera matrices. +@param R Output rotation matrix. Together with the translation vector T, this matrix brings +points given in the first camera's coordinate system to points in the second camera's +coordinate system. In more technical terms, the tuple of R and T performs a change of basis +from the first camera's coordinate system to the second camera's coordinate system. Due to its +duality, this tuple is equivalent to the position of the first camera with respect to the +second camera coordinate system. +@param T Output translation vector, see description above. @param E Output essential matrix. @param F Output fundamental matrix. +@param perViewErrors Output vector of the RMS re-projection error estimated for each pattern view. @param flags Different flags that may be zero or a combination of the following values: -- **CV_CALIB_FIX_INTRINSIC** Fix cameraMatrix? and distCoeffs? so that only R, T, E , and F +- **CALIB_FIX_INTRINSIC** Fix cameraMatrix? and distCoeffs? so that only R, T, E, and F matrices are estimated. -- **CV_CALIB_USE_INTRINSIC_GUESS** Optimize some or all of the intrinsic parameters +- **CALIB_USE_INTRINSIC_GUESS** Optimize some or all of the intrinsic parameters according to the specified flags. Initial values are provided by the user. -- **CV_CALIB_FIX_PRINCIPAL_POINT** Fix the principal points during the optimization. -- **CV_CALIB_FIX_FOCAL_LENGTH** Fix \f$f^{(j)}_x\f$ and \f$f^{(j)}_y\f$ . -- **CV_CALIB_FIX_ASPECT_RATIO** Optimize \f$f^{(j)}_y\f$ . Fix the ratio \f$f^{(j)}_x/f^{(j)}_y\f$ +- **CALIB_USE_EXTRINSIC_GUESS** R and T contain valid initial values that are optimized further. +Otherwise R and T are initialized to the median value of the pattern views (each dimension separately). +- **CALIB_FIX_PRINCIPAL_POINT** Fix the principal points during the optimization. +- **CALIB_FIX_FOCAL_LENGTH** Fix \f$f^{(j)}_x\f$ and \f$f^{(j)}_y\f$ . +- **CALIB_FIX_ASPECT_RATIO** Optimize \f$f^{(j)}_y\f$ . Fix the ratio \f$f^{(j)}_x/f^{(j)}_y\f$ . -- **CV_CALIB_SAME_FOCAL_LENGTH** Enforce \f$f^{(0)}_x=f^{(1)}_x\f$ and \f$f^{(0)}_y=f^{(1)}_y\f$ . -- **CV_CALIB_ZERO_TANGENT_DIST** Set tangential distortion coefficients for each camera to +- **CALIB_SAME_FOCAL_LENGTH** Enforce \f$f^{(0)}_x=f^{(1)}_x\f$ and \f$f^{(0)}_y=f^{(1)}_y\f$ . +- **CALIB_ZERO_TANGENT_DIST** Set tangential distortion coefficients for each camera to zeros and fix there. -- **CV_CALIB_FIX_K1,...,CV_CALIB_FIX_K6** Do not change the corresponding radial -distortion coefficient during the optimization. If CV_CALIB_USE_INTRINSIC_GUESS is set, +- **CALIB_FIX_K1,...,CALIB_FIX_K6** Do not change the corresponding radial +distortion coefficient during the optimization. If CALIB_USE_INTRINSIC_GUESS is set, the coefficient from the supplied distCoeffs matrix is used. Otherwise, it is set to 0. -- **CV_CALIB_RATIONAL_MODEL** Enable coefficients k4, k5, and k6. To provide the backward +- **CALIB_RATIONAL_MODEL** Enable coefficients k4, k5, and k6. To provide the backward compatibility, this extra flag should be explicitly specified to make the calibration function use the rational model and return 8 coefficients. If the flag is not set, the function computes and returns only 5 distortion coefficients. @@ -921,51 +1945,80 @@ backward compatibility, this extra flag should be explicitly specified to make t calibration function use the thin prism model and return 12 coefficients. If the flag is not set, the function computes and returns only 5 distortion coefficients. - **CALIB_FIX_S1_S2_S3_S4** The thin prism distortion coefficients are not changed during -the optimization. If CV_CALIB_USE_INTRINSIC_GUESS is set, the coefficient from the +the optimization. If CALIB_USE_INTRINSIC_GUESS is set, the coefficient from the supplied distCoeffs matrix is used. Otherwise, it is set to 0. - **CALIB_TILTED_MODEL** Coefficients tauX and tauY are enabled. To provide the backward compatibility, this extra flag should be explicitly specified to make the calibration function use the tilted sensor model and return 14 coefficients. If the flag is not set, the function computes and returns only 5 distortion coefficients. - **CALIB_FIX_TAUX_TAUY** The coefficients of the tilted sensor model are not changed during -the optimization. If CV_CALIB_USE_INTRINSIC_GUESS is set, the coefficient from the +the optimization. If CALIB_USE_INTRINSIC_GUESS is set, the coefficient from the supplied distCoeffs matrix is used. Otherwise, it is set to 0. @param criteria Termination criteria for the iterative optimization algorithm. -The function estimates transformation between two cameras making a stereo pair. If you have a stereo -camera where the relative position and orientation of two cameras is fixed, and if you computed -poses of an object relative to the first camera and to the second camera, (R1, T1) and (R2, T2), -respectively (this can be done with solvePnP ), then those poses definitely relate to each other. -This means that, given ( \f$R_1\f$,\f$T_1\f$ ), it should be possible to compute ( \f$R_2\f$,\f$T_2\f$ ). You only -need to know the position and orientation of the second camera relative to the first camera. This is -what the described function does. It computes ( \f$R\f$,\f$T\f$ ) so that: +The function estimates the transformation between two cameras making a stereo pair. If one computes +the poses of an object relative to the first camera and to the second camera, +( \f$R_1\f$,\f$T_1\f$ ) and (\f$R_2\f$,\f$T_2\f$), respectively, for a stereo camera where the +relative position and orientation between the two cameras are fixed, then those poses definitely +relate to each other. This means, if the relative position and orientation (\f$R\f$,\f$T\f$) of the +two cameras is known, it is possible to compute (\f$R_2\f$,\f$T_2\f$) when (\f$R_1\f$,\f$T_1\f$) is +given. This is what the described function does. It computes (\f$R\f$,\f$T\f$) such that: + +\f[R_2=R R_1\f] +\f[T_2=R T_1 + T.\f] + +Therefore, one can compute the coordinate representation of a 3D point for the second camera's +coordinate system when given the point's coordinate representation in the first camera's coordinate +system: + +\f[\begin{bmatrix} +X_2 \\ +Y_2 \\ +Z_2 \\ +1 +\end{bmatrix} = \begin{bmatrix} +R & T \\ +0 & 1 +\end{bmatrix} \begin{bmatrix} +X_1 \\ +Y_1 \\ +Z_1 \\ +1 +\end{bmatrix}.\f] -\f[R_2=R*R_1 -T_2=R*T_1 + T,\f] Optionally, it computes the essential matrix E: -\f[E= \vecthreethree{0}{-T_2}{T_1}{T_2}{0}{-T_0}{-T_1}{T_0}{0} *R\f] +\f[E= \vecthreethree{0}{-T_2}{T_1}{T_2}{0}{-T_0}{-T_1}{T_0}{0} R\f] -where \f$T_i\f$ are components of the translation vector \f$T\f$ : \f$T=[T_0, T_1, T_2]^T\f$ . And the function -can also compute the fundamental matrix F: +where \f$T_i\f$ are components of the translation vector \f$T\f$ : \f$T=[T_0, T_1, T_2]^T\f$ . +And the function can also compute the fundamental matrix F: \f[F = cameraMatrix2^{-T} E cameraMatrix1^{-1}\f] Besides the stereo-related information, the function can also perform a full calibration of each of -two cameras. However, due to the high dimensionality of the parameter space and noise in the input -data, the function can diverge from the correct solution. If the intrinsic parameters can be +the two cameras. However, due to the high dimensionality of the parameter space and noise in the +input data, the function can diverge from the correct solution. If the intrinsic parameters can be estimated with high accuracy for each of the cameras individually (for example, using -calibrateCamera ), you are recommended to do so and then pass CV_CALIB_FIX_INTRINSIC flag to the +calibrateCamera ), you are recommended to do so and then pass CALIB_FIX_INTRINSIC flag to the function along with the computed intrinsic parameters. Otherwise, if all the parameters are estimated at once, it makes sense to restrict some parameters, for example, pass -CV_CALIB_SAME_FOCAL_LENGTH and CV_CALIB_ZERO_TANGENT_DIST flags, which is usually a +CALIB_SAME_FOCAL_LENGTH and CALIB_ZERO_TANGENT_DIST flags, which is usually a reasonable assumption. -Similarly to calibrateCamera , the function minimizes the total re-projection error for all the +Similarly to calibrateCamera, the function minimizes the total re-projection error for all the points in all the available views from both cameras. The function returns the final value of the re-projection error. */ +CV_EXPORTS_AS(stereoCalibrateExtended) double stereoCalibrate( InputArrayOfArrays objectPoints, + InputArrayOfArrays imagePoints1, InputArrayOfArrays imagePoints2, + InputOutputArray cameraMatrix1, InputOutputArray distCoeffs1, + InputOutputArray cameraMatrix2, InputOutputArray distCoeffs2, + Size imageSize, InputOutputArray R,InputOutputArray T, OutputArray E, OutputArray F, + OutputArray perViewErrors, int flags = CALIB_FIX_INTRINSIC, + TermCriteria criteria = TermCriteria(TermCriteria::COUNT+TermCriteria::EPS, 30, 1e-6) ); + +/// @overload CV_EXPORTS_W double stereoCalibrate( InputArrayOfArrays objectPoints, InputArrayOfArrays imagePoints1, InputArrayOfArrays imagePoints2, InputOutputArray cameraMatrix1, InputOutputArray distCoeffs1, @@ -974,7 +2027,6 @@ CV_EXPORTS_W double stereoCalibrate( InputArrayOfArrays objectPoints, int flags = CALIB_FIX_INTRINSIC, TermCriteria criteria = TermCriteria(TermCriteria::COUNT+TermCriteria::EPS, 30, 1e-6) ); - /** @brief Computes rectification transforms for each head of a calibrated stereo camera. @param cameraMatrix1 First camera matrix. @@ -982,16 +2034,26 @@ CV_EXPORTS_W double stereoCalibrate( InputArrayOfArrays objectPoints, @param cameraMatrix2 Second camera matrix. @param distCoeffs2 Second camera distortion parameters. @param imageSize Size of the image used for stereo calibration. -@param R Rotation matrix between the coordinate systems of the first and the second cameras. -@param T Translation vector between coordinate systems of the cameras. -@param R1 Output 3x3 rectification transform (rotation matrix) for the first camera. -@param R2 Output 3x3 rectification transform (rotation matrix) for the second camera. +@param R Rotation matrix from the coordinate system of the first camera to the second camera, +see @ref stereoCalibrate. +@param T Translation vector from the coordinate system of the first camera to the second camera, +see @ref stereoCalibrate. +@param R1 Output 3x3 rectification transform (rotation matrix) for the first camera. This matrix +brings points given in the unrectified first camera's coordinate system to points in the rectified +first camera's coordinate system. In more technical terms, it performs a change of basis from the +unrectified first camera's coordinate system to the rectified first camera's coordinate system. +@param R2 Output 3x3 rectification transform (rotation matrix) for the second camera. This matrix +brings points given in the unrectified second camera's coordinate system to points in the rectified +second camera's coordinate system. In more technical terms, it performs a change of basis from the +unrectified second camera's coordinate system to the rectified second camera's coordinate system. @param P1 Output 3x4 projection matrix in the new (rectified) coordinate systems for the first -camera. +camera, i.e. it projects points given in the rectified first camera coordinate system into the +rectified first camera's image. @param P2 Output 3x4 projection matrix in the new (rectified) coordinate systems for the second -camera. -@param Q Output \f$4 \times 4\f$ disparity-to-depth mapping matrix (see reprojectImageTo3D ). -@param flags Operation flags that may be zero or CV_CALIB_ZERO_DISPARITY . If the flag is set, +camera, i.e. it projects points given in the rectified first camera coordinate system into the +rectified second camera's image. +@param Q Output \f$4 \times 4\f$ disparity-to-depth mapping matrix (see @ref reprojectImageTo3D). +@param flags Operation flags that may be zero or CALIB_ZERO_DISPARITY . If the flag is set, the function makes the principal points of each camera have the same pixel coordinates in the rectified views. And if the flag is not set, the function may still shift the images in the horizontal or vertical direction (depending on the orientation of epipolar lines) to maximize the @@ -1001,11 +2063,11 @@ scaling. Otherwise, the parameter should be between 0 and 1. alpha=0 means that images are zoomed and shifted so that only valid pixels are visible (no black areas after rectification). alpha=1 means that the rectified image is decimated and shifted so that all the pixels from the original images from the cameras are retained in the rectified images (no source -image pixels are lost). Obviously, any intermediate value yields an intermediate result between +image pixels are lost). Any intermediate value yields an intermediate result between those two extreme cases. @param newImageSize New image resolution after rectification. The same size should be passed to initUndistortRectifyMap (see the stereo_calib.cpp sample in OpenCV samples directory). When (0,0) -is passed (default), it is set to the original imageSize . Setting it to larger value can help you +is passed (default), it is set to the original imageSize . Setting it to a larger value can help you preserve details in the original image, especially when there is a big radial distortion. @param validPixROI1 Optional output rectangles inside the rectified images where all the pixels are valid. If alpha=0 , the ROIs cover the whole images. Otherwise, they are likely to be smaller @@ -1021,27 +2083,43 @@ as input. As output, it provides two rotation matrices and also two projection m coordinates. The function distinguishes the following two cases: - **Horizontal stereo**: the first and the second camera views are shifted relative to each other - mainly along the x axis (with possible small vertical shift). In the rectified images, the + mainly along the x-axis (with possible small vertical shift). In the rectified images, the corresponding epipolar lines in the left and right cameras are horizontal and have the same y-coordinate. P1 and P2 look like: - \f[\texttt{P1} = \begin{bmatrix} f & 0 & cx_1 & 0 \\ 0 & f & cy & 0 \\ 0 & 0 & 1 & 0 \end{bmatrix}\f] + \f[\texttt{P1} = \begin{bmatrix} + f & 0 & cx_1 & 0 \\ + 0 & f & cy & 0 \\ + 0 & 0 & 1 & 0 + \end{bmatrix}\f] - \f[\texttt{P2} = \begin{bmatrix} f & 0 & cx_2 & T_x*f \\ 0 & f & cy & 0 \\ 0 & 0 & 1 & 0 \end{bmatrix} ,\f] + \f[\texttt{P2} = \begin{bmatrix} + f & 0 & cx_2 & T_x*f \\ + 0 & f & cy & 0 \\ + 0 & 0 & 1 & 0 + \end{bmatrix} ,\f] where \f$T_x\f$ is a horizontal shift between the cameras and \f$cx_1=cx_2\f$ if - CV_CALIB_ZERO_DISPARITY is set. + CALIB_ZERO_DISPARITY is set. - **Vertical stereo**: the first and the second camera views are shifted relative to each other - mainly in vertical direction (and probably a bit in the horizontal direction too). The epipolar + mainly in the vertical direction (and probably a bit in the horizontal direction too). The epipolar lines in the rectified images are vertical and have the same x-coordinate. P1 and P2 look like: - \f[\texttt{P1} = \begin{bmatrix} f & 0 & cx & 0 \\ 0 & f & cy_1 & 0 \\ 0 & 0 & 1 & 0 \end{bmatrix}\f] + \f[\texttt{P1} = \begin{bmatrix} + f & 0 & cx & 0 \\ + 0 & f & cy_1 & 0 \\ + 0 & 0 & 1 & 0 + \end{bmatrix}\f] - \f[\texttt{P2} = \begin{bmatrix} f & 0 & cx & 0 \\ 0 & f & cy_2 & T_y*f \\ 0 & 0 & 1 & 0 \end{bmatrix} ,\f] + \f[\texttt{P2} = \begin{bmatrix} + f & 0 & cx & 0 \\ + 0 & f & cy_2 & T_y*f \\ + 0 & 0 & 1 & 0 + \end{bmatrix},\f] - where \f$T_y\f$ is a vertical shift between the cameras and \f$cy_1=cy_2\f$ if CALIB_ZERO_DISPARITY is - set. + where \f$T_y\f$ is a vertical shift between the cameras and \f$cy_1=cy_2\f$ if + CALIB_ZERO_DISPARITY is set. As you can see, the first three columns of P1 and P2 will effectively be the new "rectified" camera matrices. The matrices, together with R1 and R2 , can then be passed to initUndistortRectifyMap to @@ -1076,7 +2154,7 @@ findFundamentalMat . @param threshold Optional threshold used to filter out the outliers. If the parameter is greater than zero, all the point pairs that do not comply with the epipolar geometry (that is, the points for which \f$|\texttt{points2[i]}^T*\texttt{F}*\texttt{points1[i]}|>\texttt{threshold}\f$ ) are -rejected prior to computing the homographies. Otherwise,all the points are considered inliers. +rejected prior to computing the homographies. Otherwise, all the points are considered inliers. The function computes the rectification transformations without knowing intrinsic parameters of the cameras and their relative position in the space, which explains the suffix "uncalibrated". Another @@ -1120,7 +2198,7 @@ assumed. @param alpha Free scaling parameter between 0 (when all the pixels in the undistorted image are valid) and 1 (when all the source image pixels are retained in the undistorted image). See stereoRectify for details. -@param newImgSize Image size after rectification. By default,it is set to imageSize . +@param newImgSize Image size after rectification. By default, it is set to imageSize . @param validPixROI Optional output rectangle that outlines all-good-pixels region in the undistorted image. See roi1, roi2 description in stereoRectify . @param centerPrincipalPoint Optional flag that indicates whether in the new camera matrix the @@ -1131,7 +2209,7 @@ best fit a subset of the source image (determined by alpha) to the corrected ima The function computes and returns the optimal new camera matrix based on the free scaling parameter. By varying this parameter, you may retrieve only sensible pixels alpha=0 , keep all the original image pixels if there is valuable information in the corners alpha=1 , or get something in between. -When alpha\>0 , the undistortion result is likely to have some black pixels corresponding to +When alpha\>0 , the undistorted result is likely to have some black pixels corresponding to "virtual" pixels outside of the captured distorted image. The original camera matrix, distortion coefficients, the computed new camera matrix, and newImageSize should be passed to initUndistortRectifyMap to produce the maps for remap . @@ -1141,6 +2219,139 @@ CV_EXPORTS_W Mat getOptimalNewCameraMatrix( InputArray cameraMatrix, InputArray CV_OUT Rect* validPixROI = 0, bool centerPrincipalPoint = false); +/** @brief Computes Hand-Eye calibration: \f$_{}^{g}\textrm{T}_c\f$ + +@param[in] R_gripper2base Rotation part extracted from the homogeneous matrix that transforms a point +expressed in the gripper frame to the robot base frame (\f$_{}^{b}\textrm{T}_g\f$). +This is a vector (`vector`) that contains the rotation matrices for all the transformations +from gripper frame to robot base frame. +@param[in] t_gripper2base Translation part extracted from the homogeneous matrix that transforms a point +expressed in the gripper frame to the robot base frame (\f$_{}^{b}\textrm{T}_g\f$). +This is a vector (`vector`) that contains the translation vectors for all the transformations +from gripper frame to robot base frame. +@param[in] R_target2cam Rotation part extracted from the homogeneous matrix that transforms a point +expressed in the target frame to the camera frame (\f$_{}^{c}\textrm{T}_t\f$). +This is a vector (`vector`) that contains the rotation matrices for all the transformations +from calibration target frame to camera frame. +@param[in] t_target2cam Rotation part extracted from the homogeneous matrix that transforms a point +expressed in the target frame to the camera frame (\f$_{}^{c}\textrm{T}_t\f$). +This is a vector (`vector`) that contains the translation vectors for all the transformations +from calibration target frame to camera frame. +@param[out] R_cam2gripper Estimated rotation part extracted from the homogeneous matrix that transforms a point +expressed in the camera frame to the gripper frame (\f$_{}^{g}\textrm{T}_c\f$). +@param[out] t_cam2gripper Estimated translation part extracted from the homogeneous matrix that transforms a point +expressed in the camera frame to the gripper frame (\f$_{}^{g}\textrm{T}_c\f$). +@param[in] method One of the implemented Hand-Eye calibration method, see cv::HandEyeCalibrationMethod + +The function performs the Hand-Eye calibration using various methods. One approach consists in estimating the +rotation then the translation (separable solutions) and the following methods are implemented: + - R. Tsai, R. Lenz A New Technique for Fully Autonomous and Efficient 3D Robotics Hand/EyeCalibration \cite Tsai89 + - F. Park, B. Martin Robot Sensor Calibration: Solving AX = XB on the Euclidean Group \cite Park94 + - R. Horaud, F. Dornaika Hand-Eye Calibration \cite Horaud95 + +Another approach consists in estimating simultaneously the rotation and the translation (simultaneous solutions), +with the following implemented method: + - N. Andreff, R. Horaud, B. Espiau On-line Hand-Eye Calibration \cite Andreff99 + - K. Daniilidis Hand-Eye Calibration Using Dual Quaternions \cite Daniilidis98 + +The following picture describes the Hand-Eye calibration problem where the transformation between a camera ("eye") +mounted on a robot gripper ("hand") has to be estimated. + +![](pics/hand-eye_figure.png) + +The calibration procedure is the following: + - a static calibration pattern is used to estimate the transformation between the target frame + and the camera frame + - the robot gripper is moved in order to acquire several poses + - for each pose, the homogeneous transformation between the gripper frame and the robot base frame is recorded using for + instance the robot kinematics +\f[ + \begin{bmatrix} + X_b\\ + Y_b\\ + Z_b\\ + 1 + \end{bmatrix} + = + \begin{bmatrix} + _{}^{b}\textrm{R}_g & _{}^{b}\textrm{t}_g \\ + 0_{1 \times 3} & 1 + \end{bmatrix} + \begin{bmatrix} + X_g\\ + Y_g\\ + Z_g\\ + 1 + \end{bmatrix} +\f] + - for each pose, the homogeneous transformation between the calibration target frame and the camera frame is recorded using + for instance a pose estimation method (PnP) from 2D-3D point correspondences +\f[ + \begin{bmatrix} + X_c\\ + Y_c\\ + Z_c\\ + 1 + \end{bmatrix} + = + \begin{bmatrix} + _{}^{c}\textrm{R}_t & _{}^{c}\textrm{t}_t \\ + 0_{1 \times 3} & 1 + \end{bmatrix} + \begin{bmatrix} + X_t\\ + Y_t\\ + Z_t\\ + 1 + \end{bmatrix} +\f] + +The Hand-Eye calibration procedure returns the following homogeneous transformation +\f[ + \begin{bmatrix} + X_g\\ + Y_g\\ + Z_g\\ + 1 + \end{bmatrix} + = + \begin{bmatrix} + _{}^{g}\textrm{R}_c & _{}^{g}\textrm{t}_c \\ + 0_{1 \times 3} & 1 + \end{bmatrix} + \begin{bmatrix} + X_c\\ + Y_c\\ + Z_c\\ + 1 + \end{bmatrix} +\f] + +This problem is also known as solving the \f$\mathbf{A}\mathbf{X}=\mathbf{X}\mathbf{B}\f$ equation: +\f[ + \begin{align*} + ^{b}{\textrm{T}_g}^{(1)} \hspace{0.2em} ^{g}\textrm{T}_c \hspace{0.2em} ^{c}{\textrm{T}_t}^{(1)} &= + \hspace{0.1em} ^{b}{\textrm{T}_g}^{(2)} \hspace{0.2em} ^{g}\textrm{T}_c \hspace{0.2em} ^{c}{\textrm{T}_t}^{(2)} \\ + + (^{b}{\textrm{T}_g}^{(2)})^{-1} \hspace{0.2em} ^{b}{\textrm{T}_g}^{(1)} \hspace{0.2em} ^{g}\textrm{T}_c &= + \hspace{0.1em} ^{g}\textrm{T}_c \hspace{0.2em} ^{c}{\textrm{T}_t}^{(2)} (^{c}{\textrm{T}_t}^{(1)})^{-1} \\ + + \textrm{A}_i \textrm{X} &= \textrm{X} \textrm{B}_i \\ + \end{align*} +\f] + +\note +Additional information can be found on this [website](http://campar.in.tum.de/Chair/HandEyeCalibration). +\note +A minimum of 2 motions with non parallel rotation axes are necessary to determine the hand-eye transformation. +So at least 3 different poses are required, but it is strongly recommended to use many more poses. + + */ +CV_EXPORTS_W void calibrateHandEye( InputArrayOfArrays R_gripper2base, InputArrayOfArrays t_gripper2base, + InputArrayOfArrays R_target2cam, InputArrayOfArrays t_target2cam, + OutputArray R_cam2gripper, OutputArray t_cam2gripper, + HandEyeCalibrationMethod method=CALIB_HAND_EYE_TSAI ); + /** @brief Converts points from Euclidean to homogeneous space. @param src Input vector of N-dimensional points. @@ -1184,13 +2395,14 @@ floating-point (single or double precision). - **CV_FM_8POINT** for an 8-point algorithm. \f$N \ge 8\f$ - **CV_FM_RANSAC** for the RANSAC algorithm. \f$N \ge 8\f$ - **CV_FM_LMEDS** for the LMedS algorithm. \f$N \ge 8\f$ -@param param1 Parameter used for RANSAC. It is the maximum distance from a point to an epipolar +@param ransacReprojThreshold Parameter used only for RANSAC. It is the maximum distance from a point to an epipolar line in pixels, beyond which the point is considered an outlier and is not used for computing the final fundamental matrix. It can be set to something like 1-3, depending on the accuracy of the point localization, image resolution, and the image noise. -@param param2 Parameter used for the RANSAC or LMedS methods only. It specifies a desirable level +@param confidence Parameter used for the RANSAC and LMedS methods only. It specifies a desirable level of confidence (probability) that the estimated matrix is correct. @param mask +@param maxIters The maximum number of robust method iterations. The epipolar geometry is described by the following equation: @@ -1224,15 +2436,20 @@ stereoRectifyUncalibrated to compute the rectification transformation. : findFundamentalMat(points1, points2, FM_RANSAC, 3, 0.99); @endcode */ +CV_EXPORTS_W Mat findFundamentalMat( InputArray points1, InputArray points2, + int method, double ransacReprojThreshold, double confidence, + int maxIters, OutputArray mask = noArray() ); + +/** @overload */ CV_EXPORTS_W Mat findFundamentalMat( InputArray points1, InputArray points2, int method = FM_RANSAC, - double param1 = 3., double param2 = 0.99, + double ransacReprojThreshold = 3., double confidence = 0.99, OutputArray mask = noArray() ); /** @overload */ CV_EXPORTS Mat findFundamentalMat( InputArray points1, InputArray points2, OutputArray mask, int method = FM_RANSAC, - double param1 = 3., double param2 = 0.99 ); + double ransacReprojThreshold = 3., double confidence = 0.99 ); /** @brief Calculates an essential matrix from the corresponding points in two images. @@ -1242,9 +2459,9 @@ be floating-point (single or double precision). @param cameraMatrix Camera matrix \f$K = \vecthreethree{f_x}{0}{c_x}{0}{f_y}{c_y}{0}{0}{1}\f$ . Note that this function assumes that points1 and points2 are feature points from cameras with the same camera matrix. -@param method Method for computing a fundamental matrix. +@param method Method for computing an essential matrix. - **RANSAC** for the RANSAC algorithm. -- **MEDS** for the LMedS algorithm. +- **LMEDS** for the LMedS algorithm. @param prob Parameter used for the RANSAC or LMedS methods only. It specifies a desirable level of confidence (probability) that the estimated matrix is correct. @param threshold Parameter used for RANSAC. It is the maximum distance from a point to an epipolar @@ -1273,8 +2490,8 @@ CV_EXPORTS_W Mat findEssentialMat( InputArray points1, InputArray points2, be floating-point (single or double precision). @param points2 Array of the second image points of the same size and format as points1 . @param focal focal length of the camera. Note that this function assumes that points1 and points2 -are feature points from cameras with same focal length and principle point. -@param pp principle point of the camera. +are feature points from cameras with same focal length and principal point. +@param pp principal point of the camera. @param method Method for computing a fundamental matrix. - **RANSAC** for the RANSAC algorithm. - **LMEDS** for the LMedS algorithm. @@ -1309,35 +2526,47 @@ CV_EXPORTS_W Mat findEssentialMat( InputArray points1, InputArray points2, @param R2 Another possible rotation matrix. @param t One possible translation. -This function decompose an essential matrix E using svd decomposition @cite HartleyZ00 . Generally 4 -possible poses exists for a given E. They are \f$[R_1, t]\f$, \f$[R_1, -t]\f$, \f$[R_2, t]\f$, \f$[R_2, -t]\f$. By -decomposing E, you can only get the direction of the translation, so the function returns unit t. +This function decomposes the essential matrix E using svd decomposition @cite HartleyZ00. In +general, four possible poses exist for the decomposition of E. They are \f$[R_1, t]\f$, +\f$[R_1, -t]\f$, \f$[R_2, t]\f$, \f$[R_2, -t]\f$. + +If E gives the epipolar constraint \f$[p_2; 1]^T A^{-T} E A^{-1} [p_1; 1] = 0\f$ between the image +points \f$p_1\f$ in the first image and \f$p_2\f$ in second image, then any of the tuples +\f$[R_1, t]\f$, \f$[R_1, -t]\f$, \f$[R_2, t]\f$, \f$[R_2, -t]\f$ is a change of basis from the first +camera's coordinate system to the second camera's coordinate system. However, by decomposing E, one +can only get the direction of the translation. For this reason, the translation t is returned with +unit length. */ CV_EXPORTS_W void decomposeEssentialMat( InputArray E, OutputArray R1, OutputArray R2, OutputArray t ); -/** @brief Recover relative camera rotation and translation from an estimated essential matrix and the -corresponding points in two images, using cheirality check. Returns the number of inliers which pass -the check. +/** @brief Recovers the relative camera rotation and the translation from an estimated essential +matrix and the corresponding points in two images, using cheirality check. Returns the number of +inliers that pass the check. @param E The input essential matrix. @param points1 Array of N 2D points from the first image. The point coordinates should be floating-point (single or double precision). @param points2 Array of the second image points of the same size and format as points1 . -@param cameraMatrix Camera matrix \f$K = \vecthreethree{f_x}{0}{c_x}{0}{f_y}{c_y}{0}{0}{1}\f$ . +@param cameraMatrix Camera matrix \f$A = \vecthreethree{f_x}{0}{c_x}{0}{f_y}{c_y}{0}{0}{1}\f$ . Note that this function assumes that points1 and points2 are feature points from cameras with the same camera matrix. -@param R Recovered relative rotation. -@param t Recoverd relative translation. -@param mask Input/output mask for inliers in points1 and points2. -: If it is not empty, then it marks inliers in points1 and points2 for then given essential -matrix E. Only these inliers will be used to recover pose. In the output mask only inliers -which pass the cheirality check. -This function decomposes an essential matrix using decomposeEssentialMat and then verifies possible -pose hypotheses by doing cheirality check. The cheirality check basically means that the -triangulated 3D points should have positive depth. Some details can be found in @cite Nister03 . - -This function can be used to process output E and mask from findEssentialMat. In this scenario, -points1 and points2 are the same input for findEssentialMat. : +@param R Output rotation matrix. Together with the translation vector, this matrix makes up a tuple +that performs a change of basis from the first camera's coordinate system to the second camera's +coordinate system. Note that, in general, t can not be used for this tuple, see the parameter +described below. +@param t Output translation vector. This vector is obtained by @ref decomposeEssentialMat and +therefore is only known up to scale, i.e. t is the direction of the translation vector and has unit +length. +@param mask Input/output mask for inliers in points1 and points2. If it is not empty, then it marks +inliers in points1 and points2 for then given essential matrix E. Only these inliers will be used to +recover pose. In the output mask only inliers which pass the cheirality check. + +This function decomposes an essential matrix using @ref decomposeEssentialMat and then verifies +possible pose hypotheses by doing cheirality check. The cheirality check means that the +triangulated 3D points should have positive depth. Some details can be found in @cite Nister03. + +This function can be used to process the output E and mask from @ref findEssentialMat. In this +scenario, points1 and points2 are the same input for findEssentialMat.: @code // Example. Estimation of fundamental matrix using the RANSAC algorithm int point_count = 100; @@ -1369,20 +2598,24 @@ CV_EXPORTS_W int recoverPose( InputArray E, InputArray points1, InputArray point @param points1 Array of N 2D points from the first image. The point coordinates should be floating-point (single or double precision). @param points2 Array of the second image points of the same size and format as points1 . -@param R Recovered relative rotation. -@param t Recoverd relative translation. +@param R Output rotation matrix. Together with the translation vector, this matrix makes up a tuple +that performs a change of basis from the first camera's coordinate system to the second camera's +coordinate system. Note that, in general, t can not be used for this tuple, see the parameter +description below. +@param t Output translation vector. This vector is obtained by @ref decomposeEssentialMat and +therefore is only known up to scale, i.e. t is the direction of the translation vector and has unit +length. @param focal Focal length of the camera. Note that this function assumes that points1 and points2 -are feature points from cameras with same focal length and principle point. -@param pp Principle point of the camera. -@param mask Input/output mask for inliers in points1 and points2. -: If it is not empty, then it marks inliers in points1 and points2 for then given essential -matrix E. Only these inliers will be used to recover pose. In the output mask only inliers -which pass the cheirality check. +are feature points from cameras with same focal length and principal point. +@param pp principal point of the camera. +@param mask Input/output mask for inliers in points1 and points2. If it is not empty, then it marks +inliers in points1 and points2 for then given essential matrix E. Only these inliers will be used to +recover pose. In the output mask only inliers which pass the cheirality check. This function differs from the one above that it computes camera matrix from focal length and principal point: -\f[K = +\f[A = \begin{bmatrix} f & 0 & x_{pp} \\ 0 & f & y_{pp} \\ @@ -1394,6 +2627,35 @@ CV_EXPORTS_W int recoverPose( InputArray E, InputArray points1, InputArray point double focal = 1.0, Point2d pp = Point2d(0, 0), InputOutputArray mask = noArray() ); +/** @overload +@param E The input essential matrix. +@param points1 Array of N 2D points from the first image. The point coordinates should be +floating-point (single or double precision). +@param points2 Array of the second image points of the same size and format as points1. +@param cameraMatrix Camera matrix \f$A = \vecthreethree{f_x}{0}{c_x}{0}{f_y}{c_y}{0}{0}{1}\f$ . +Note that this function assumes that points1 and points2 are feature points from cameras with the +same camera matrix. +@param R Output rotation matrix. Together with the translation vector, this matrix makes up a tuple +that performs a change of basis from the first camera's coordinate system to the second camera's +coordinate system. Note that, in general, t can not be used for this tuple, see the parameter +description below. +@param t Output translation vector. This vector is obtained by @ref decomposeEssentialMat and +therefore is only known up to scale, i.e. t is the direction of the translation vector and has unit +length. +@param distanceThresh threshold distance which is used to filter out far away points (i.e. infinite +points). +@param mask Input/output mask for inliers in points1 and points2. If it is not empty, then it marks +inliers in points1 and points2 for then given essential matrix E. Only these inliers will be used to +recover pose. In the output mask only inliers which pass the cheirality check. +@param triangulatedPoints 3D points which were reconstructed by triangulation. + +This function differs from the one above that it outputs the triangulated 3D point that are used for +the cheirality check. + */ +CV_EXPORTS_W int recoverPose( InputArray E, InputArray points1, InputArray points2, + InputArray cameraMatrix, OutputArray R, OutputArray t, double distanceThresh, InputOutputArray mask = noArray(), + OutputArray triangulatedPoints = noArray()); + /** @brief For points in an image of a stereo pair, computes the corresponding epilines in the other image. @param points Input points. \f$N \times 1\f$ or \f$1 \times N\f$ matrix of type CV_32FC2 or @@ -1420,22 +2682,27 @@ Line coefficients are defined up to a scale. They are normalized so that \f$a_i^ CV_EXPORTS_W void computeCorrespondEpilines( InputArray points, int whichImage, InputArray F, OutputArray lines ); -/** @brief Reconstructs points by triangulation. +/** @brief This function reconstructs 3-dimensional points (in homogeneous coordinates) by using +their observations with a stereo camera. -@param projMatr1 3x4 projection matrix of the first camera. -@param projMatr2 3x4 projection matrix of the second camera. -@param projPoints1 2xN array of feature points in the first image. In case of c++ version it can -be also a vector of feature points or two-channel matrix of size 1xN or Nx1. -@param projPoints2 2xN array of corresponding points in the second image. In case of c++ version +@param projMatr1 3x4 projection matrix of the first camera, i.e. this matrix projects 3D points +given in the world's coordinate system into the first image. +@param projMatr2 3x4 projection matrix of the second camera, i.e. this matrix projects 3D points +given in the world's coordinate system into the second image. +@param projPoints1 2xN array of feature points in the first image. In the case of the c++ version, it can be also a vector of feature points or two-channel matrix of size 1xN or Nx1. -@param points4D 4xN array of reconstructed points in homogeneous coordinates. - -The function reconstructs 3-dimensional points (in homogeneous coordinates) by using their -observations with a stereo camera. Projections matrices can be obtained from stereoRectify. +@param projPoints2 2xN array of corresponding points in the second image. In the case of the c++ +version, it can be also a vector of feature points or two-channel matrix of size 1xN or Nx1. +@param points4D 4xN array of reconstructed points in homogeneous coordinates. These points are +returned in the world's coordinate system. @note Keep in mind that all input data should be of float type in order for this function to work. +@note + If the projection matrices from @ref stereoRectify are used, then the returned points are + represented in the first camera's rectified coordinate system. + @sa reprojectImageTo3D */ @@ -1480,7 +2747,7 @@ CV_EXPORTS_W void filterSpeckles( InputOutputArray img, double newVal, //! computes valid disparity ROI from the valid ROIs of the rectified images (that are returned by cv::stereoRectify()) CV_EXPORTS_W Rect getValidDisparityROI( Rect roi1, Rect roi2, int minDisparity, int numberOfDisparities, - int SADWindowSize ); + int blockSize ); //! validates disparity using the left-right check. The matrix "cost" should be computed by the stereo correspondence algorithm CV_EXPORTS_W void validateDisparity( InputOutputArray disparity, InputArray cost, @@ -1490,12 +2757,16 @@ CV_EXPORTS_W void validateDisparity( InputOutputArray disparity, InputArray cost /** @brief Reprojects a disparity image to 3D space. @param disparity Input single-channel 8-bit unsigned, 16-bit signed, 32-bit signed or 32-bit -floating-point disparity image. If 16-bit signed format is used, the values are assumed to have no -fractional bits. -@param _3dImage Output 3-channel floating-point image of the same size as disparity . Each -element of _3dImage(x,y) contains 3D coordinates of the point (x,y) computed from the disparity -map. -@param Q \f$4 \times 4\f$ perspective transformation matrix that can be obtained with stereoRectify. +floating-point disparity image. The values of 8-bit / 16-bit signed formats are assumed to have no +fractional bits. If the disparity is 16-bit signed format, as computed by @ref StereoBM or +@ref StereoSGBM and maybe other algorithms, it should be divided by 16 (and scaled to float) before +being used here. +@param _3dImage Output 3-channel floating-point image of the same size as disparity. Each element of +_3dImage(x,y) contains 3D coordinates of the point (x,y) computed from the disparity map. If one +uses Q obtained by @ref stereoRectify, then the returned points are represented in the first +camera's rectified coordinate system. +@param Q \f$4 \times 4\f$ perspective transformation matrix that can be obtained with +@ref stereoRectify. @param handleMissingValues Indicates, whether the function should handle missing values (i.e. points where the disparity was not computed). If handleMissingValues=true, then pixels with the minimal disparity that corresponds to the outliers (see StereoMatcher::compute ) are transformed @@ -1504,14 +2775,23 @@ to 3D points with a very large Z value (currently set to 10000). depth. ddepth can also be set to CV_16S, CV_32S or CV_32F. The function transforms a single-channel disparity map to a 3-channel image representing a 3D -surface. That is, for each pixel (x,y) andthe corresponding disparity d=disparity(x,y) , it +surface. That is, for each pixel (x,y) and the corresponding disparity d=disparity(x,y) , it computes: -\f[\begin{array}{l} [X \; Y \; Z \; W]^T = \texttt{Q} *[x \; y \; \texttt{disparity} (x,y) \; 1]^T \\ \texttt{\_3dImage} (x,y) = (X/W, \; Y/W, \; Z/W) \end{array}\f] +\f[\begin{bmatrix} +X \\ +Y \\ +Z \\ +W +\end{bmatrix} = Q \begin{bmatrix} +x \\ +y \\ +\texttt{disparity} (x,y) \\ +z +\end{bmatrix}.\f] -The matrix Q can be an arbitrary \f$4 \times 4\f$ matrix (for example, the one computed by -stereoRectify). To reproject a sparse set of points {(x,y,d),...} to 3D space, use -perspectiveTransform . +@sa + To reproject a sparse set of points {(x,y,d),...} to 3D space, use perspectiveTransform. */ CV_EXPORTS_W void reprojectImageTo3D( InputArray disparity, OutputArray _3dImage, InputArray Q, @@ -1520,21 +2800,62 @@ CV_EXPORTS_W void reprojectImageTo3D( InputArray disparity, /** @brief Calculates the Sampson Distance between two points. -The function sampsonDistance calculates and returns the first order approximation of the geometric error as: -\f[sd( \texttt{pt1} , \texttt{pt2} )= \frac{(\texttt{pt2}^t \cdot \texttt{F} \cdot \texttt{pt1})^2}{(\texttt{F} \cdot \texttt{pt1})(0) + (\texttt{F} \cdot \texttt{pt1})(1) + (\texttt{F}^t \cdot \texttt{pt2})(0) + (\texttt{F}^t \cdot \texttt{pt2})(1)}\f] -The fundamental matrix may be calculated using the cv::findFundamentalMat function. See HZ 11.4.3 for details. +The function cv::sampsonDistance calculates and returns the first order approximation of the geometric error as: +\f[ +sd( \texttt{pt1} , \texttt{pt2} )= +\frac{(\texttt{pt2}^t \cdot \texttt{F} \cdot \texttt{pt1})^2} +{((\texttt{F} \cdot \texttt{pt1})(0))^2 + +((\texttt{F} \cdot \texttt{pt1})(1))^2 + +((\texttt{F}^t \cdot \texttt{pt2})(0))^2 + +((\texttt{F}^t \cdot \texttt{pt2})(1))^2} +\f] +The fundamental matrix may be calculated using the cv::findFundamentalMat function. See @cite HartleyZ00 11.4.3 for details. @param pt1 first homogeneous 2d point @param pt2 second homogeneous 2d point @param F fundamental matrix +@return The computed Sampson distance. */ CV_EXPORTS_W double sampsonDistance(InputArray pt1, InputArray pt2, InputArray F); /** @brief Computes an optimal affine transformation between two 3D point sets. -@param src First input 3D point set. -@param dst Second input 3D point set. -@param out Output 3D affine transformation matrix \f$3 \times 4\f$ . -@param inliers Output vector indicating which points are inliers. +It computes +\f[ +\begin{bmatrix} +x\\ +y\\ +z\\ +\end{bmatrix} += +\begin{bmatrix} +a_{11} & a_{12} & a_{13}\\ +a_{21} & a_{22} & a_{23}\\ +a_{31} & a_{32} & a_{33}\\ +\end{bmatrix} +\begin{bmatrix} +X\\ +Y\\ +Z\\ +\end{bmatrix} ++ +\begin{bmatrix} +b_1\\ +b_2\\ +b_3\\ +\end{bmatrix} +\f] + +@param src First input 3D point set containing \f$(X,Y,Z)\f$. +@param dst Second input 3D point set containing \f$(x,y,z)\f$. +@param out Output 3D affine transformation matrix \f$3 \times 4\f$ of the form +\f[ +\begin{bmatrix} +a_{11} & a_{12} & a_{13} & b_1\\ +a_{21} & a_{22} & a_{23} & b_2\\ +a_{31} & a_{32} & a_{33} & b_3\\ +\end{bmatrix} +\f] +@param inliers Output vector indicating which points are inliers (1-inlier, 0-outlier). @param ransacThreshold Maximum reprojection error in the RANSAC algorithm to consider a point as an inlier. @param confidence Confidence level, between 0 and 1, for the estimated transformation. Anything @@ -1548,6 +2869,174 @@ CV_EXPORTS_W int estimateAffine3D(InputArray src, InputArray dst, OutputArray out, OutputArray inliers, double ransacThreshold = 3, double confidence = 0.99); +/** @brief Computes an optimal translation between two 3D point sets. + * + * It computes + * \f[ + * \begin{bmatrix} + * x\\ + * y\\ + * z\\ + * \end{bmatrix} + * = + * \begin{bmatrix} + * X\\ + * Y\\ + * Z\\ + * \end{bmatrix} + * + + * \begin{bmatrix} + * b_1\\ + * b_2\\ + * b_3\\ + * \end{bmatrix} + * \f] + * + * @param src First input 3D point set containing \f$(X,Y,Z)\f$. + * @param dst Second input 3D point set containing \f$(x,y,z)\f$. + * @param out Output 3D translation vector \f$3 \times 1\f$ of the form + * \f[ + * \begin{bmatrix} + * b_1 \\ + * b_2 \\ + * b_3 \\ + * \end{bmatrix} + * \f] + * @param inliers Output vector indicating which points are inliers (1-inlier, 0-outlier). + * @param ransacThreshold Maximum reprojection error in the RANSAC algorithm to consider a point as + * an inlier. + * @param confidence Confidence level, between 0 and 1, for the estimated transformation. Anything + * between 0.95 and 0.99 is usually good enough. Values too close to 1 can slow down the estimation + * significantly. Values lower than 0.8-0.9 can result in an incorrectly estimated transformation. + * + * The function estimates an optimal 3D translation between two 3D point sets using the + * RANSAC algorithm. + * */ +CV_EXPORTS_W int estimateTranslation3D(InputArray src, InputArray dst, + OutputArray out, OutputArray inliers, + double ransacThreshold = 3, double confidence = 0.99); + +/** @brief Computes an optimal affine transformation between two 2D point sets. + +It computes +\f[ +\begin{bmatrix} +x\\ +y\\ +\end{bmatrix} += +\begin{bmatrix} +a_{11} & a_{12}\\ +a_{21} & a_{22}\\ +\end{bmatrix} +\begin{bmatrix} +X\\ +Y\\ +\end{bmatrix} ++ +\begin{bmatrix} +b_1\\ +b_2\\ +\end{bmatrix} +\f] + +@param from First input 2D point set containing \f$(X,Y)\f$. +@param to Second input 2D point set containing \f$(x,y)\f$. +@param inliers Output vector indicating which points are inliers (1-inlier, 0-outlier). +@param method Robust method used to compute transformation. The following methods are possible: +- cv::RANSAC - RANSAC-based robust method +- cv::LMEDS - Least-Median robust method +RANSAC is the default method. +@param ransacReprojThreshold Maximum reprojection error in the RANSAC algorithm to consider +a point as an inlier. Applies only to RANSAC. +@param maxIters The maximum number of robust method iterations. +@param confidence Confidence level, between 0 and 1, for the estimated transformation. Anything +between 0.95 and 0.99 is usually good enough. Values too close to 1 can slow down the estimation +significantly. Values lower than 0.8-0.9 can result in an incorrectly estimated transformation. +@param refineIters Maximum number of iterations of refining algorithm (Levenberg-Marquardt). +Passing 0 will disable refining, so the output matrix will be output of robust method. + +@return Output 2D affine transformation matrix \f$2 \times 3\f$ or empty matrix if transformation +could not be estimated. The returned matrix has the following form: +\f[ +\begin{bmatrix} +a_{11} & a_{12} & b_1\\ +a_{21} & a_{22} & b_2\\ +\end{bmatrix} +\f] + +The function estimates an optimal 2D affine transformation between two 2D point sets using the +selected robust algorithm. + +The computed transformation is then refined further (using only inliers) with the +Levenberg-Marquardt method to reduce the re-projection error even more. + +@note +The RANSAC method can handle practically any ratio of outliers but needs a threshold to +distinguish inliers from outliers. The method LMeDS does not need any threshold but it works +correctly only when there are more than 50% of inliers. + +@sa estimateAffinePartial2D, getAffineTransform +*/ +CV_EXPORTS_W cv::Mat estimateAffine2D(InputArray from, InputArray to, OutputArray inliers = noArray(), + int method = RANSAC, double ransacReprojThreshold = 3, + size_t maxIters = 2000, double confidence = 0.99, + size_t refineIters = 10); + +/** @brief Computes an optimal limited affine transformation with 4 degrees of freedom between +two 2D point sets. + +@param from First input 2D point set. +@param to Second input 2D point set. +@param inliers Output vector indicating which points are inliers. +@param method Robust method used to compute transformation. The following methods are possible: +- cv::RANSAC - RANSAC-based robust method +- cv::LMEDS - Least-Median robust method +RANSAC is the default method. +@param ransacReprojThreshold Maximum reprojection error in the RANSAC algorithm to consider +a point as an inlier. Applies only to RANSAC. +@param maxIters The maximum number of robust method iterations. +@param confidence Confidence level, between 0 and 1, for the estimated transformation. Anything +between 0.95 and 0.99 is usually good enough. Values too close to 1 can slow down the estimation +significantly. Values lower than 0.8-0.9 can result in an incorrectly estimated transformation. +@param refineIters Maximum number of iterations of refining algorithm (Levenberg-Marquardt). +Passing 0 will disable refining, so the output matrix will be output of robust method. + +@return Output 2D affine transformation (4 degrees of freedom) matrix \f$2 \times 3\f$ or +empty matrix if transformation could not be estimated. + +The function estimates an optimal 2D affine transformation with 4 degrees of freedom limited to +combinations of translation, rotation, and uniform scaling. Uses the selected algorithm for robust +estimation. + +The computed transformation is then refined further (using only inliers) with the +Levenberg-Marquardt method to reduce the re-projection error even more. + +Estimated transformation matrix is: +\f[ \begin{bmatrix} \cos(\theta) \cdot s & -\sin(\theta) \cdot s & t_x \\ + \sin(\theta) \cdot s & \cos(\theta) \cdot s & t_y +\end{bmatrix} \f] +Where \f$ \theta \f$ is the rotation angle, \f$ s \f$ the scaling factor and \f$ t_x, t_y \f$ are +translations in \f$ x, y \f$ axes respectively. + +@note +The RANSAC method can handle practically any ratio of outliers but need a threshold to +distinguish inliers from outliers. The method LMeDS does not need any threshold but it works +correctly only when there are more than 50% of inliers. + +@sa estimateAffine2D, getAffineTransform +*/ +CV_EXPORTS_W cv::Mat estimateAffinePartial2D(InputArray from, InputArray to, OutputArray inliers = noArray(), + int method = RANSAC, double ransacReprojThreshold = 3, + size_t maxIters = 2000, double confidence = 0.99, + size_t refineIters = 10); + +/** @example samples/cpp/tutorial_code/features2D/Homography/decompose_homography.cpp +An example program with homography decomposition. + +Check @ref tutorial_homography "the corresponding tutorial" for more details. +*/ + /** @brief Decompose a homography matrix to rotation(s), translation(s) and plane normal(s). @param H The input homography matrix between two images. @@ -1556,11 +3045,19 @@ CV_EXPORTS_W int estimateAffine3D(InputArray src, InputArray dst, @param translations Array of translation matrices. @param normals Array of plane normal matrices. -This function extracts relative camera motion between two views observing a planar object from the -homography H induced by the plane. The intrinsic camera matrix K must also be provided. The function -may return up to four mathematical solution sets. At least two of the solutions may further be -invalidated if point correspondences are available by applying positive depth constraint (all points -must be in front of the camera). The decomposition method is described in detail in @cite Malis . +This function extracts relative camera motion between two views of a planar object and returns up to +four mathematical solution tuples of rotation, translation, and plane normal. The decomposition of +the homography matrix H is described in detail in @cite Malis. + +If the homography H, induced by the plane, gives the constraint +\f[s_i \vecthree{x'_i}{y'_i}{1} \sim H \vecthree{x_i}{y_i}{1}\f] on the source image points +\f$p_i\f$ and the destination image points \f$p'_i\f$, then the tuple of rotations[k] and +translations[k] is a change of basis from the source camera's coordinate system to the destination +camera's coordinate system. However, by decomposing H, one can only get the translation normalized +by the (typically unknown) depth of the scene, i.e. its direction but with normalized length. + +If point correspondences are available, at least two solutions may further be invalidated, by +applying positive depth constraint, i.e. all points must be in front of the camera. */ CV_EXPORTS_W int decomposeHomographyMat(InputArray H, InputArray K, @@ -1568,6 +3065,31 @@ CV_EXPORTS_W int decomposeHomographyMat(InputArray H, OutputArrayOfArrays translations, OutputArrayOfArrays normals); +/** @brief Filters homography decompositions based on additional information. + +@param rotations Vector of rotation matrices. +@param normals Vector of plane normal matrices. +@param beforePoints Vector of (rectified) visible reference points before the homography is applied +@param afterPoints Vector of (rectified) visible reference points after the homography is applied +@param possibleSolutions Vector of int indices representing the viable solution set after filtering +@param pointsMask optional Mat/Vector of 8u type representing the mask for the inliers as given by the findHomography function + +This function is intended to filter the output of the decomposeHomographyMat based on additional +information as described in @cite Malis . The summary of the method: the decomposeHomographyMat function +returns 2 unique solutions and their "opposites" for a total of 4 solutions. If we have access to the +sets of points visible in the camera frame before and after the homography transformation is applied, +we can determine which are the true potential solutions and which are the opposites by verifying which +homographies are consistent with all visible reference points being in front of the camera. The inputs +are left unchanged; the filtered solution set is returned as indices into the existing one. + +*/ +CV_EXPORTS_W void filterHomographyDecompByVisibleRefpoints(InputArrayOfArrays rotations, + InputArrayOfArrays normals, + InputArray beforePoints, + InputArray afterPoints, + OutputArray possibleSolutions, + InputArray pointsMask = noArray()); + /** @brief The base class for stereo correspondence algorithms. */ class CV_EXPORTS_W StereoMatcher : public Algorithm @@ -1683,7 +3205,8 @@ class CV_EXPORTS_W StereoSGBM : public StereoMatcher { MODE_SGBM = 0, MODE_HH = 1, - MODE_SGBM_3WAY = 2 + MODE_SGBM_3WAY = 2, + MODE_HH4 = 3 }; CV_WRAP virtual int getPreFilterCap() const = 0; @@ -1714,8 +3237,8 @@ class CV_EXPORTS_W StereoSGBM : public StereoMatcher the smoother the disparity is. P1 is the penalty on the disparity change by plus or minus 1 between neighbor pixels. P2 is the penalty on the disparity change by more than 1 between neighbor pixels. The algorithm requires P2 \> P1 . See stereo_match.cpp sample where some reasonably good - P1 and P2 values are shown (like 8\*number_of_image_channels\*SADWindowSize\*SADWindowSize and - 32\*number_of_image_channels\*SADWindowSize\*SADWindowSize , respectively). + P1 and P2 values are shown (like 8\*number_of_image_channels\*blockSize\*blockSize and + 32\*number_of_image_channels\*blockSize\*blockSize , respectively). @param disp12MaxDiff Maximum allowed difference (in integer pixel units) in the left-right disparity check. Set it to a non-positive value to disable the check. @param preFilterCap Truncation value for the prefiltered image pixels. The algorithm first @@ -1738,13 +3261,216 @@ class CV_EXPORTS_W StereoSGBM : public StereoMatcher set StereoSGBM::numDisparities at minimum. The second constructor enables you to set each parameter to a custom value. */ - CV_WRAP static Ptr create(int minDisparity, int numDisparities, int blockSize, + CV_WRAP static Ptr create(int minDisparity = 0, int numDisparities = 16, int blockSize = 3, int P1 = 0, int P2 = 0, int disp12MaxDiff = 0, int preFilterCap = 0, int uniquenessRatio = 0, int speckleWindowSize = 0, int speckleRange = 0, int mode = StereoSGBM::MODE_SGBM); }; + +//! cv::undistort mode +enum UndistortTypes +{ + PROJ_SPHERICAL_ORTHO = 0, + PROJ_SPHERICAL_EQRECT = 1 +}; + +/** @brief Transforms an image to compensate for lens distortion. + +The function transforms an image to compensate radial and tangential lens distortion. + +The function is simply a combination of #initUndistortRectifyMap (with unity R ) and #remap +(with bilinear interpolation). See the former function for details of the transformation being +performed. + +Those pixels in the destination image, for which there is no correspondent pixels in the source +image, are filled with zeros (black color). + +A particular subset of the source image that will be visible in the corrected image can be regulated +by newCameraMatrix. You can use #getOptimalNewCameraMatrix to compute the appropriate +newCameraMatrix depending on your requirements. + +The camera matrix and the distortion parameters can be determined using #calibrateCamera. If +the resolution of images is different from the resolution used at the calibration stage, \f$f_x, +f_y, c_x\f$ and \f$c_y\f$ need to be scaled accordingly, while the distortion coefficients remain +the same. + +@param src Input (distorted) image. +@param dst Output (corrected) image that has the same size and type as src . +@param cameraMatrix Input camera matrix \f$A = \vecthreethree{f_x}{0}{c_x}{0}{f_y}{c_y}{0}{0}{1}\f$ . +@param distCoeffs Input vector of distortion coefficients +\f$(k_1, k_2, p_1, p_2[, k_3[, k_4, k_5, k_6[, s_1, s_2, s_3, s_4[, \tau_x, \tau_y]]]])\f$ +of 4, 5, 8, 12 or 14 elements. If the vector is NULL/empty, the zero distortion coefficients are assumed. +@param newCameraMatrix Camera matrix of the distorted image. By default, it is the same as +cameraMatrix but you may additionally scale and shift the result by using a different matrix. + */ +CV_EXPORTS_W void undistort( InputArray src, OutputArray dst, + InputArray cameraMatrix, + InputArray distCoeffs, + InputArray newCameraMatrix = noArray() ); + +/** @brief Computes the undistortion and rectification transformation map. + +The function computes the joint undistortion and rectification transformation and represents the +result in the form of maps for remap. The undistorted image looks like original, as if it is +captured with a camera using the camera matrix =newCameraMatrix and zero distortion. In case of a +monocular camera, newCameraMatrix is usually equal to cameraMatrix, or it can be computed by +#getOptimalNewCameraMatrix for a better control over scaling. In case of a stereo camera, +newCameraMatrix is normally set to P1 or P2 computed by #stereoRectify . + +Also, this new camera is oriented differently in the coordinate space, according to R. That, for +example, helps to align two heads of a stereo camera so that the epipolar lines on both images +become horizontal and have the same y- coordinate (in case of a horizontally aligned stereo camera). + +The function actually builds the maps for the inverse mapping algorithm that is used by remap. That +is, for each pixel \f$(u, v)\f$ in the destination (corrected and rectified) image, the function +computes the corresponding coordinates in the source image (that is, in the original image from +camera). The following process is applied: +\f[ +\begin{array}{l} +x \leftarrow (u - {c'}_x)/{f'}_x \\ +y \leftarrow (v - {c'}_y)/{f'}_y \\ +{[X\,Y\,W]} ^T \leftarrow R^{-1}*[x \, y \, 1]^T \\ +x' \leftarrow X/W \\ +y' \leftarrow Y/W \\ +r^2 \leftarrow x'^2 + y'^2 \\ +x'' \leftarrow x' \frac{1 + k_1 r^2 + k_2 r^4 + k_3 r^6}{1 + k_4 r^2 + k_5 r^4 + k_6 r^6} ++ 2p_1 x' y' + p_2(r^2 + 2 x'^2) + s_1 r^2 + s_2 r^4\\ +y'' \leftarrow y' \frac{1 + k_1 r^2 + k_2 r^4 + k_3 r^6}{1 + k_4 r^2 + k_5 r^4 + k_6 r^6} ++ p_1 (r^2 + 2 y'^2) + 2 p_2 x' y' + s_3 r^2 + s_4 r^4 \\ +s\vecthree{x'''}{y'''}{1} = +\vecthreethree{R_{33}(\tau_x, \tau_y)}{0}{-R_{13}((\tau_x, \tau_y)} +{0}{R_{33}(\tau_x, \tau_y)}{-R_{23}(\tau_x, \tau_y)} +{0}{0}{1} R(\tau_x, \tau_y) \vecthree{x''}{y''}{1}\\ +map_x(u,v) \leftarrow x''' f_x + c_x \\ +map_y(u,v) \leftarrow y''' f_y + c_y +\end{array} +\f] +where \f$(k_1, k_2, p_1, p_2[, k_3[, k_4, k_5, k_6[, s_1, s_2, s_3, s_4[, \tau_x, \tau_y]]]])\f$ +are the distortion coefficients. + +In case of a stereo camera, this function is called twice: once for each camera head, after +stereoRectify, which in its turn is called after #stereoCalibrate. But if the stereo camera +was not calibrated, it is still possible to compute the rectification transformations directly from +the fundamental matrix using #stereoRectifyUncalibrated. For each camera, the function computes +homography H as the rectification transformation in a pixel domain, not a rotation matrix R in 3D +space. R can be computed from H as +\f[\texttt{R} = \texttt{cameraMatrix} ^{-1} \cdot \texttt{H} \cdot \texttt{cameraMatrix}\f] +where cameraMatrix can be chosen arbitrarily. + +@param cameraMatrix Input camera matrix \f$A=\vecthreethree{f_x}{0}{c_x}{0}{f_y}{c_y}{0}{0}{1}\f$ . +@param distCoeffs Input vector of distortion coefficients +\f$(k_1, k_2, p_1, p_2[, k_3[, k_4, k_5, k_6[, s_1, s_2, s_3, s_4[, \tau_x, \tau_y]]]])\f$ +of 4, 5, 8, 12 or 14 elements. If the vector is NULL/empty, the zero distortion coefficients are assumed. +@param R Optional rectification transformation in the object space (3x3 matrix). R1 or R2 , +computed by #stereoRectify can be passed here. If the matrix is empty, the identity transformation +is assumed. In cvInitUndistortMap R assumed to be an identity matrix. +@param newCameraMatrix New camera matrix \f$A'=\vecthreethree{f_x'}{0}{c_x'}{0}{f_y'}{c_y'}{0}{0}{1}\f$. +@param size Undistorted image size. +@param m1type Type of the first output map that can be CV_32FC1, CV_32FC2 or CV_16SC2, see #convertMaps +@param map1 The first output map. +@param map2 The second output map. + */ +CV_EXPORTS_W +void initUndistortRectifyMap(InputArray cameraMatrix, InputArray distCoeffs, + InputArray R, InputArray newCameraMatrix, + Size size, int m1type, OutputArray map1, OutputArray map2); + +//! initializes maps for #remap for wide-angle +CV_EXPORTS +float initWideAngleProjMap(InputArray cameraMatrix, InputArray distCoeffs, + Size imageSize, int destImageWidth, + int m1type, OutputArray map1, OutputArray map2, + enum UndistortTypes projType = PROJ_SPHERICAL_EQRECT, double alpha = 0); +static inline +float initWideAngleProjMap(InputArray cameraMatrix, InputArray distCoeffs, + Size imageSize, int destImageWidth, + int m1type, OutputArray map1, OutputArray map2, + int projType, double alpha = 0) +{ + return initWideAngleProjMap(cameraMatrix, distCoeffs, imageSize, destImageWidth, + m1type, map1, map2, (UndistortTypes)projType, alpha); +} + +/** @brief Returns the default new camera matrix. + +The function returns the camera matrix that is either an exact copy of the input cameraMatrix (when +centerPrinicipalPoint=false ), or the modified one (when centerPrincipalPoint=true). + +In the latter case, the new camera matrix will be: + +\f[\begin{bmatrix} f_x && 0 && ( \texttt{imgSize.width} -1)*0.5 \\ 0 && f_y && ( \texttt{imgSize.height} -1)*0.5 \\ 0 && 0 && 1 \end{bmatrix} ,\f] + +where \f$f_x\f$ and \f$f_y\f$ are \f$(0,0)\f$ and \f$(1,1)\f$ elements of cameraMatrix, respectively. + +By default, the undistortion functions in OpenCV (see #initUndistortRectifyMap, #undistort) do not +move the principal point. However, when you work with stereo, it is important to move the principal +points in both views to the same y-coordinate (which is required by most of stereo correspondence +algorithms), and may be to the same x-coordinate too. So, you can form the new camera matrix for +each view where the principal points are located at the center. + +@param cameraMatrix Input camera matrix. +@param imgsize Camera view image size in pixels. +@param centerPrincipalPoint Location of the principal point in the new camera matrix. The +parameter indicates whether this location should be at the image center or not. + */ +CV_EXPORTS_W +Mat getDefaultNewCameraMatrix(InputArray cameraMatrix, Size imgsize = Size(), + bool centerPrincipalPoint = false); + +/** @brief Computes the ideal point coordinates from the observed point coordinates. + +The function is similar to #undistort and #initUndistortRectifyMap but it operates on a +sparse set of points instead of a raster image. Also the function performs a reverse transformation +to projectPoints. In case of a 3D object, it does not reconstruct its 3D coordinates, but for a +planar object, it does, up to a translation vector, if the proper R is specified. + +For each observed point coordinate \f$(u, v)\f$ the function computes: +\f[ +\begin{array}{l} +x^{"} \leftarrow (u - c_x)/f_x \\ +y^{"} \leftarrow (v - c_y)/f_y \\ +(x',y') = undistort(x^{"},y^{"}, \texttt{distCoeffs}) \\ +{[X\,Y\,W]} ^T \leftarrow R*[x' \, y' \, 1]^T \\ +x \leftarrow X/W \\ +y \leftarrow Y/W \\ +\text{only performed if P is specified:} \\ +u' \leftarrow x {f'}_x + {c'}_x \\ +v' \leftarrow y {f'}_y + {c'}_y +\end{array} +\f] + +where *undistort* is an approximate iterative algorithm that estimates the normalized original +point coordinates out of the normalized distorted point coordinates ("normalized" means that the +coordinates do not depend on the camera matrix). + +The function can be used for both a stereo camera head or a monocular camera (when R is empty). +@param src Observed point coordinates, 2xN/Nx2 1-channel or 1xN/Nx1 2-channel (CV_32FC2 or CV_64FC2) (or +vector\ ). +@param dst Output ideal point coordinates (1xN/Nx1 2-channel or vector\ ) after undistortion and reverse perspective +transformation. If matrix P is identity or omitted, dst will contain normalized point coordinates. +@param cameraMatrix Camera matrix \f$\vecthreethree{f_x}{0}{c_x}{0}{f_y}{c_y}{0}{0}{1}\f$ . +@param distCoeffs Input vector of distortion coefficients +\f$(k_1, k_2, p_1, p_2[, k_3[, k_4, k_5, k_6[, s_1, s_2, s_3, s_4[, \tau_x, \tau_y]]]])\f$ +of 4, 5, 8, 12 or 14 elements. If the vector is NULL/empty, the zero distortion coefficients are assumed. +@param R Rectification transformation in the object space (3x3 matrix). R1 or R2 computed by +#stereoRectify can be passed here. If the matrix is empty, the identity transformation is used. +@param P New camera matrix (3x3) or new projection matrix (3x4) \f$\begin{bmatrix} {f'}_x & 0 & {c'}_x & t_x \\ 0 & {f'}_y & {c'}_y & t_y \\ 0 & 0 & 1 & t_z \end{bmatrix}\f$. P1 or P2 computed by +#stereoRectify can be passed here. If the matrix is empty, the identity new camera matrix is used. + */ +CV_EXPORTS_W +void undistortPoints(InputArray src, OutputArray dst, + InputArray cameraMatrix, InputArray distCoeffs, + InputArray R = noArray(), InputArray P = noArray()); +/** @overload + @note Default version of #undistortPoints does 5 iterations to compute undistorted points. + */ +CV_EXPORTS_AS(undistortPointsIter) +void undistortPoints(InputArray src, OutputArray dst, + InputArray cameraMatrix, InputArray distCoeffs, + InputArray R, InputArray P, TermCriteria criteria); + //! @} calib3d /** @brief The methods in this namespace use a so-called fisheye camera model. @@ -1756,15 +3482,16 @@ namespace fisheye //! @{ enum{ - CALIB_USE_INTRINSIC_GUESS = 1, - CALIB_RECOMPUTE_EXTRINSIC = 2, - CALIB_CHECK_COND = 4, - CALIB_FIX_SKEW = 8, - CALIB_FIX_K1 = 16, - CALIB_FIX_K2 = 32, - CALIB_FIX_K3 = 64, - CALIB_FIX_K4 = 128, - CALIB_FIX_INTRINSIC = 256 + CALIB_USE_INTRINSIC_GUESS = 1 << 0, + CALIB_RECOMPUTE_EXTRINSIC = 1 << 1, + CALIB_CHECK_COND = 1 << 2, + CALIB_FIX_SKEW = 1 << 3, + CALIB_FIX_K1 = 1 << 4, + CALIB_FIX_K2 = 1 << 5, + CALIB_FIX_K3 = 1 << 6, + CALIB_FIX_K4 = 1 << 7, + CALIB_FIX_INTRINSIC = 1 << 8, + CALIB_FIX_PRINCIPAL_POINT = 1 << 9 }; /** @brief Projects points using fisheye model @@ -1803,7 +3530,7 @@ namespace fisheye @param alpha The skew coefficient. @param distorted Output array of image points, 1xN/Nx1 2-channel, or vector\ . - Note that the function assumes the camera matrix of the undistorted points to be indentity. + Note that the function assumes the camera matrix of the undistorted points to be identity. This means if you want to transform back points undistorted with undistortPoints() you have to multiply them with \f$P^{-1}\f$. */ @@ -1848,7 +3575,7 @@ namespace fisheye @param D Input vector of distortion coefficients \f$(k_1, k_2, k_3, k_4)\f$. @param Knew Camera matrix of the distorted image. By default, it is the identity matrix but you may additionally scale and shift the result by using a different matrix. - @param new_size + @param new_size the new size The function transforms an image to compensate radial and tangential lens distortion. @@ -1874,14 +3601,14 @@ namespace fisheye /** @brief Estimates new camera matrix for undistortion or rectification. @param K Camera matrix \f$K = \vecthreethree{f_x}{0}{c_x}{0}{f_y}{c_y}{0}{0}{_1}\f$. - @param image_size + @param image_size Size of the image @param D Input vector of distortion coefficients \f$(k_1, k_2, k_3, k_4)\f$. @param R Rectification transformation in the object space: 3x3 1-channel, or vector: 3x1/1x3 1-channel or 1x1 3-channel @param P New camera matrix (3x3) or new projection matrix (3x4) @param balance Sets the new focal length in range between the min focal length and the max focal length. Balance is in range of [0, 1]. - @param new_size + @param new_size the new size @param fov_scale Divisor for new focal length. */ CV_EXPORTS_W void estimateNewCameraMatrixForUndistortRectify(InputArray K, InputArray D, const Size &image_size, InputArray R, @@ -1914,8 +3641,10 @@ namespace fisheye of intrinsic optimization. - **fisheye::CALIB_CHECK_COND** The functions will check validity of condition number. - **fisheye::CALIB_FIX_SKEW** Skew coefficient (alpha) is set to zero and stay zero. - - **fisheye::CALIB_FIX_K1..4** Selected distortion coefficients are set to zeros and stay - zero. + - **fisheye::CALIB_FIX_K1..fisheye::CALIB_FIX_K4** Selected distortion coefficients + are set to zeros and stay zero. + - **fisheye::CALIB_FIX_PRINCIPAL_POINT** The principal point is not changed during the global +optimization. It stays at the center or at a different location specified when CALIB_USE_INTRINSIC_GUESS is set too. @param criteria Termination criteria for the iterative optimization algorithm. */ CV_EXPORTS_W double calibrate(InputArrayOfArrays objectPoints, InputArrayOfArrays imagePoints, const Size& image_size, @@ -1939,7 +3668,7 @@ namespace fisheye @param P2 Output 3x4 projection matrix in the new (rectified) coordinate systems for the second camera. @param Q Output \f$4 \times 4\f$ disparity-to-depth mapping matrix (see reprojectImageTo3D ). - @param flags Operation flags that may be zero or CV_CALIB_ZERO_DISPARITY . If the flag is set, + @param flags Operation flags that may be zero or CALIB_ZERO_DISPARITY . If the flag is set, the function makes the principal points of each camera have the same pixel coordinates in the rectified views. And if the flag is not set, the function may still shift the images in the horizontal or vertical direction (depending on the orientation of epipolar lines) to maximize the @@ -1965,7 +3694,7 @@ namespace fisheye observed by the second camera. @param K1 Input/output first camera matrix: \f$\vecthreethree{f_x^{(j)}}{0}{c_x^{(j)}}{0}{f_y^{(j)}}{c_y^{(j)}}{0}{0}{1}\f$ , \f$j = 0,\, 1\f$ . If - any of fisheye::CALIB_USE_INTRINSIC_GUESS , fisheye::CV_CALIB_FIX_INTRINSIC are specified, + any of fisheye::CALIB_USE_INTRINSIC_GUESS , fisheye::CALIB_FIX_INTRINSIC are specified, some or all of the matrix components must be initialized. @param D1 Input/output vector of distortion coefficients \f$(k_1, k_2, k_3, k_4)\f$ of 4 elements. @param K2 Input/output second camera matrix. The parameter is similar to K1 . @@ -1975,7 +3704,7 @@ namespace fisheye @param R Output rotation matrix between the 1st and the 2nd camera coordinate systems. @param T Output translation vector between the coordinate systems of the cameras. @param flags Different flags that may be zero or a combination of the following values: - - **fisheye::CV_CALIB_FIX_INTRINSIC** Fix K1, K2? and D1, D2? so that only R, T matrices + - **fisheye::CALIB_FIX_INTRINSIC** Fix K1, K2? and D1, D2? so that only R, T matrices are estimated. - **fisheye::CALIB_USE_INTRINSIC_GUESS** K1, K2 contains valid initial values of fx, fy, cx, cy that are optimized further. Otherwise, (cx, cy) is initially set to the image @@ -1994,12 +3723,48 @@ namespace fisheye TermCriteria criteria = TermCriteria(TermCriteria::COUNT + TermCriteria::EPS, 100, DBL_EPSILON)); //! @} calib3d_fisheye -} +} // end namespace fisheye -} // cv +} //end namespace cv -#ifndef DISABLE_OPENCV_24_COMPATIBILITY -#include "opencv2/calib3d/calib3d_c.h" +#if 0 //def __cplusplus +////////////////////////////////////////////////////////////////////////////////////////// +class CV_EXPORTS CvLevMarq +{ +public: + CvLevMarq(); + CvLevMarq( int nparams, int nerrs, CvTermCriteria criteria= + cvTermCriteria(cv::TermCriteria::EPS+cv::TermCriteria::MAX_ITER,30,DBL_EPSILON), + bool completeSymmFlag=false ); + ~CvLevMarq(); + void init( int nparams, int nerrs, CvTermCriteria criteria= + cvTermCriteria(cv::TermCriteria::EPS+cv::TermCriteria::MAX_ITER,30,DBL_EPSILON), + bool completeSymmFlag=false ); + bool update( const CvMat*& param, CvMat*& J, CvMat*& err ); + bool updateAlt( const CvMat*& param, CvMat*& JtJ, CvMat*& JtErr, double*& errNorm ); + + void clear(); + void step(); + enum { DONE=0, STARTED=1, CALC_J=2, CHECK_ERR=3 }; + + cv::Ptr mask; + cv::Ptr prevParam; + cv::Ptr param; + cv::Ptr J; + cv::Ptr err; + cv::Ptr JtJ; + cv::Ptr JtJN; + cv::Ptr JtErr; + cv::Ptr JtJV; + cv::Ptr JtJW; + double prevErrNorm, errNorm; + int lambdaLg10; + CvTermCriteria criteria; + int state; + int iters; + bool completeSymmFlag; + int solveMethod; +}; #endif #endif diff --git a/IPL/include/opencv/opencv2/calib3d/calib3d_c.h b/IPL/include/opencv/opencv2/calib3d/calib3d_c.h index 0e77aa8..959579c 100644 --- a/IPL/include/opencv/opencv2/calib3d/calib3d_c.h +++ b/IPL/include/opencv/opencv2/calib3d/calib3d_c.h @@ -41,44 +41,15 @@ // //M*/ -#ifndef __OPENCV_CALIB3D_C_H__ -#define __OPENCV_CALIB3D_C_H__ +#ifndef OPENCV_CALIB3D_C_H +#define OPENCV_CALIB3D_C_H -#include "opencv2/core/core_c.h" +#include "opencv2/core/types_c.h" #ifdef __cplusplus extern "C" { #endif -/** @addtogroup calib3d_c - @{ - */ - -/****************************************************************************************\ -* Camera Calibration, Pose Estimation and Stereo * -\****************************************************************************************/ - -typedef struct CvPOSITObject CvPOSITObject; - -/* Allocates and initializes CvPOSITObject structure before doing cvPOSIT */ -CVAPI(CvPOSITObject*) cvCreatePOSITObject( CvPoint3D32f* points, int point_count ); - - -/* Runs POSIT (POSe from ITeration) algorithm for determining 3d position of - an object given its model and projection in a weak-perspective case */ -CVAPI(void) cvPOSIT( CvPOSITObject* posit_object, CvPoint2D32f* image_points, - double focal_length, CvTermCriteria criteria, - float* rotation_matrix, float* translation_vector); - -/* Releases CvPOSITObject structure */ -CVAPI(void) cvReleasePOSITObject( CvPOSITObject** posit_object ); - -/* updates the number of RANSAC iterations */ -CVAPI(int) cvRANSACUpdateNumIters( double p, double err_prob, - int model_points, int max_iters ); - -CVAPI(void) cvConvertPointsHomogeneous( const CvMat* src, CvMat* dst ); - /* Calculates fundamental matrix given a set of corresponding points */ #define CV_FM_7POINT 1 #define CV_FM_8POINT 2 @@ -99,136 +70,11 @@ enum CV_DLS = 3 // Joel A. Hesch and Stergios I. Roumeliotis. "A Direct Least-Squares (DLS) Method for PnP" }; -CVAPI(int) cvFindFundamentalMat( const CvMat* points1, const CvMat* points2, - CvMat* fundamental_matrix, - int method CV_DEFAULT(CV_FM_RANSAC), - double param1 CV_DEFAULT(3.), double param2 CV_DEFAULT(0.99), - CvMat* status CV_DEFAULT(NULL) ); - -/* For each input point on one of images - computes parameters of the corresponding - epipolar line on the other image */ -CVAPI(void) cvComputeCorrespondEpilines( const CvMat* points, - int which_image, - const CvMat* fundamental_matrix, - CvMat* correspondent_lines ); - -/* Triangulation functions */ - -CVAPI(void) cvTriangulatePoints(CvMat* projMatr1, CvMat* projMatr2, - CvMat* projPoints1, CvMat* projPoints2, - CvMat* points4D); - -CVAPI(void) cvCorrectMatches(CvMat* F, CvMat* points1, CvMat* points2, - CvMat* new_points1, CvMat* new_points2); - - -/* Computes the optimal new camera matrix according to the free scaling parameter alpha: - alpha=0 - only valid pixels will be retained in the undistorted image - alpha=1 - all the source image pixels will be retained in the undistorted image -*/ -CVAPI(void) cvGetOptimalNewCameraMatrix( const CvMat* camera_matrix, - const CvMat* dist_coeffs, - CvSize image_size, double alpha, - CvMat* new_camera_matrix, - CvSize new_imag_size CV_DEFAULT(cvSize(0,0)), - CvRect* valid_pixel_ROI CV_DEFAULT(0), - int center_principal_point CV_DEFAULT(0)); - -/* Converts rotation vector to rotation matrix or vice versa */ -CVAPI(int) cvRodrigues2( const CvMat* src, CvMat* dst, - CvMat* jacobian CV_DEFAULT(0) ); - -/* Finds perspective transformation between the object plane and image (view) plane */ -CVAPI(int) cvFindHomography( const CvMat* src_points, - const CvMat* dst_points, - CvMat* homography, - int method CV_DEFAULT(0), - double ransacReprojThreshold CV_DEFAULT(3), - CvMat* mask CV_DEFAULT(0), - int maxIters CV_DEFAULT(2000), - double confidence CV_DEFAULT(0.995)); - -/* Computes RQ decomposition for 3x3 matrices */ -CVAPI(void) cvRQDecomp3x3( const CvMat *matrixM, CvMat *matrixR, CvMat *matrixQ, - CvMat *matrixQx CV_DEFAULT(NULL), - CvMat *matrixQy CV_DEFAULT(NULL), - CvMat *matrixQz CV_DEFAULT(NULL), - CvPoint3D64f *eulerAngles CV_DEFAULT(NULL)); - -/* Computes projection matrix decomposition */ -CVAPI(void) cvDecomposeProjectionMatrix( const CvMat *projMatr, CvMat *calibMatr, - CvMat *rotMatr, CvMat *posVect, - CvMat *rotMatrX CV_DEFAULT(NULL), - CvMat *rotMatrY CV_DEFAULT(NULL), - CvMat *rotMatrZ CV_DEFAULT(NULL), - CvPoint3D64f *eulerAngles CV_DEFAULT(NULL)); - -/* Computes d(AB)/dA and d(AB)/dB */ -CVAPI(void) cvCalcMatMulDeriv( const CvMat* A, const CvMat* B, CvMat* dABdA, CvMat* dABdB ); - -/* Computes r3 = rodrigues(rodrigues(r2)*rodrigues(r1)), - t3 = rodrigues(r2)*t1 + t2 and the respective derivatives */ -CVAPI(void) cvComposeRT( const CvMat* _rvec1, const CvMat* _tvec1, - const CvMat* _rvec2, const CvMat* _tvec2, - CvMat* _rvec3, CvMat* _tvec3, - CvMat* dr3dr1 CV_DEFAULT(0), CvMat* dr3dt1 CV_DEFAULT(0), - CvMat* dr3dr2 CV_DEFAULT(0), CvMat* dr3dt2 CV_DEFAULT(0), - CvMat* dt3dr1 CV_DEFAULT(0), CvMat* dt3dt1 CV_DEFAULT(0), - CvMat* dt3dr2 CV_DEFAULT(0), CvMat* dt3dt2 CV_DEFAULT(0) ); - -/* Projects object points to the view plane using - the specified extrinsic and intrinsic camera parameters */ -CVAPI(void) cvProjectPoints2( const CvMat* object_points, const CvMat* rotation_vector, - const CvMat* translation_vector, const CvMat* camera_matrix, - const CvMat* distortion_coeffs, CvMat* image_points, - CvMat* dpdrot CV_DEFAULT(NULL), CvMat* dpdt CV_DEFAULT(NULL), - CvMat* dpdf CV_DEFAULT(NULL), CvMat* dpdc CV_DEFAULT(NULL), - CvMat* dpddist CV_DEFAULT(NULL), - double aspect_ratio CV_DEFAULT(0)); - -/* Finds extrinsic camera parameters from - a few known corresponding point pairs and intrinsic parameters */ -CVAPI(void) cvFindExtrinsicCameraParams2( const CvMat* object_points, - const CvMat* image_points, - const CvMat* camera_matrix, - const CvMat* distortion_coeffs, - CvMat* rotation_vector, - CvMat* translation_vector, - int use_extrinsic_guess CV_DEFAULT(0) ); - -/* Computes initial estimate of the intrinsic camera parameters - in case of planar calibration target (e.g. chessboard) */ -CVAPI(void) cvInitIntrinsicParams2D( const CvMat* object_points, - const CvMat* image_points, - const CvMat* npoints, CvSize image_size, - CvMat* camera_matrix, - double aspect_ratio CV_DEFAULT(1.) ); - #define CV_CALIB_CB_ADAPTIVE_THRESH 1 #define CV_CALIB_CB_NORMALIZE_IMAGE 2 #define CV_CALIB_CB_FILTER_QUADS 4 #define CV_CALIB_CB_FAST_CHECK 8 -// Performs a fast check if a chessboard is in the input image. This is a workaround to -// a problem of cvFindChessboardCorners being slow on images with no chessboard -// - src: input image -// - size: chessboard size -// Returns 1 if a chessboard can be in this image and findChessboardCorners should be called, -// 0 if there is no chessboard, -1 in case of error -CVAPI(int) cvCheckChessboard(IplImage* src, CvSize size); - - /* Detects corners on a chessboard calibration pattern */ -CVAPI(int) cvFindChessboardCorners( const void* image, CvSize pattern_size, - CvPoint2D32f* corners, - int* corner_count CV_DEFAULT(NULL), - int flags CV_DEFAULT(CV_CALIB_CB_ADAPTIVE_THRESH+CV_CALIB_CB_NORMALIZE_IMAGE) ); - -/* Draws individual chessboard corners or the whole chessboard detected */ -CVAPI(void) cvDrawChessboardCorners( CvArr* image, CvSize pattern_size, - CvPoint2D32f* corners, - int count, int pattern_was_found ); - #define CV_CALIB_USE_INTRINSIC_GUESS 1 #define CV_CALIB_FIX_ASPECT_RATIO 2 #define CV_CALIB_FIX_PRINCIPAL_POINT 4 @@ -245,140 +91,19 @@ CVAPI(void) cvDrawChessboardCorners( CvArr* image, CvSize pattern_size, #define CV_CALIB_FIX_S1_S2_S3_S4 65536 #define CV_CALIB_TILTED_MODEL 262144 #define CV_CALIB_FIX_TAUX_TAUY 524288 +#define CV_CALIB_FIX_TANGENT_DIST 2097152 - -/* Finds intrinsic and extrinsic camera parameters - from a few views of known calibration pattern */ -CVAPI(double) cvCalibrateCamera2( const CvMat* object_points, - const CvMat* image_points, - const CvMat* point_counts, - CvSize image_size, - CvMat* camera_matrix, - CvMat* distortion_coeffs, - CvMat* rotation_vectors CV_DEFAULT(NULL), - CvMat* translation_vectors CV_DEFAULT(NULL), - int flags CV_DEFAULT(0), - CvTermCriteria term_crit CV_DEFAULT(cvTermCriteria( - CV_TERMCRIT_ITER+CV_TERMCRIT_EPS,30,DBL_EPSILON)) ); - -/* Computes various useful characteristics of the camera from the data computed by - cvCalibrateCamera2 */ -CVAPI(void) cvCalibrationMatrixValues( const CvMat *camera_matrix, - CvSize image_size, - double aperture_width CV_DEFAULT(0), - double aperture_height CV_DEFAULT(0), - double *fovx CV_DEFAULT(NULL), - double *fovy CV_DEFAULT(NULL), - double *focal_length CV_DEFAULT(NULL), - CvPoint2D64f *principal_point CV_DEFAULT(NULL), - double *pixel_aspect_ratio CV_DEFAULT(NULL)); +#define CV_CALIB_NINTRINSIC 18 #define CV_CALIB_FIX_INTRINSIC 256 #define CV_CALIB_SAME_FOCAL_LENGTH 512 -/* Computes the transformation from one camera coordinate system to another one - from a few correspondent views of the same calibration target. Optionally, calibrates - both cameras */ -CVAPI(double) cvStereoCalibrate( const CvMat* object_points, const CvMat* image_points1, - const CvMat* image_points2, const CvMat* npoints, - CvMat* camera_matrix1, CvMat* dist_coeffs1, - CvMat* camera_matrix2, CvMat* dist_coeffs2, - CvSize image_size, CvMat* R, CvMat* T, - CvMat* E CV_DEFAULT(0), CvMat* F CV_DEFAULT(0), - int flags CV_DEFAULT(CV_CALIB_FIX_INTRINSIC), - CvTermCriteria term_crit CV_DEFAULT(cvTermCriteria( - CV_TERMCRIT_ITER+CV_TERMCRIT_EPS,30,1e-6)) ); - #define CV_CALIB_ZERO_DISPARITY 1024 -/* Computes 3D rotations (+ optional shift) for each camera coordinate system to make both - views parallel (=> to make all the epipolar lines horizontal or vertical) */ -CVAPI(void) cvStereoRectify( const CvMat* camera_matrix1, const CvMat* camera_matrix2, - const CvMat* dist_coeffs1, const CvMat* dist_coeffs2, - CvSize image_size, const CvMat* R, const CvMat* T, - CvMat* R1, CvMat* R2, CvMat* P1, CvMat* P2, - CvMat* Q CV_DEFAULT(0), - int flags CV_DEFAULT(CV_CALIB_ZERO_DISPARITY), - double alpha CV_DEFAULT(-1), - CvSize new_image_size CV_DEFAULT(cvSize(0,0)), - CvRect* valid_pix_ROI1 CV_DEFAULT(0), - CvRect* valid_pix_ROI2 CV_DEFAULT(0)); - -/* Computes rectification transformations for uncalibrated pair of images using a set - of point correspondences */ -CVAPI(int) cvStereoRectifyUncalibrated( const CvMat* points1, const CvMat* points2, - const CvMat* F, CvSize img_size, - CvMat* H1, CvMat* H2, - double threshold CV_DEFAULT(5)); - - - /* stereo correspondence parameters and functions */ - #define CV_STEREO_BM_NORMALIZED_RESPONSE 0 #define CV_STEREO_BM_XSOBEL 1 -/* Block matching algorithm structure */ -typedef struct CvStereoBMState -{ - // pre-filtering (normalization of input images) - int preFilterType; // =CV_STEREO_BM_NORMALIZED_RESPONSE now - int preFilterSize; // averaging window size: ~5x5..21x21 - int preFilterCap; // the output of pre-filtering is clipped by [-preFilterCap,preFilterCap] - - // correspondence using Sum of Absolute Difference (SAD) - int SADWindowSize; // ~5x5..21x21 - int minDisparity; // minimum disparity (can be negative) - int numberOfDisparities; // maximum disparity - minimum disparity (> 0) - - // post-filtering - int textureThreshold; // the disparity is only computed for pixels - // with textured enough neighborhood - int uniquenessRatio; // accept the computed disparity d* only if - // SAD(d) >= SAD(d*)*(1 + uniquenessRatio/100.) - // for any d != d*+/-1 within the search range. - int speckleWindowSize; // disparity variation window - int speckleRange; // acceptable range of variation in window - - int trySmallerWindows; // if 1, the results may be more accurate, - // at the expense of slower processing - CvRect roi1, roi2; - int disp12MaxDiff; - - // temporary buffers - CvMat* preFilteredImg0; - CvMat* preFilteredImg1; - CvMat* slidingSumBuf; - CvMat* cost; - CvMat* disp; -} CvStereoBMState; - -#define CV_STEREO_BM_BASIC 0 -#define CV_STEREO_BM_FISH_EYE 1 -#define CV_STEREO_BM_NARROW 2 - -CVAPI(CvStereoBMState*) cvCreateStereoBMState(int preset CV_DEFAULT(CV_STEREO_BM_BASIC), - int numberOfDisparities CV_DEFAULT(0)); - -CVAPI(void) cvReleaseStereoBMState( CvStereoBMState** state ); - -CVAPI(void) cvFindStereoCorrespondenceBM( const CvArr* left, const CvArr* right, - CvArr* disparity, CvStereoBMState* state ); - -CVAPI(CvRect) cvGetValidDisparityROI( CvRect roi1, CvRect roi2, int minDisparity, - int numberOfDisparities, int SADWindowSize ); - -CVAPI(void) cvValidateDisparity( CvArr* disparity, const CvArr* cost, - int minDisparity, int numberOfDisparities, - int disp12MaxDiff CV_DEFAULT(1) ); - -/* Reprojects the computed disparity image to the 3D space using the specified 4x4 matrix */ -CVAPI(void) cvReprojectImageTo3D( const CvArr* disparityImage, - CvArr* _3dImage, const CvMat* Q, - int handleMissingValues CV_DEFAULT(0) ); - -/** @} calib3d_c */ - #ifdef __cplusplus } // extern "C" @@ -388,11 +113,11 @@ class CV_EXPORTS CvLevMarq public: CvLevMarq(); CvLevMarq( int nparams, int nerrs, CvTermCriteria criteria= - cvTermCriteria(CV_TERMCRIT_EPS+CV_TERMCRIT_ITER,30,DBL_EPSILON), + cvTermCriteria(cv::TermCriteria::EPS+cv::TermCriteria::MAX_ITER,30,DBL_EPSILON), bool completeSymmFlag=false ); ~CvLevMarq(); void init( int nparams, int nerrs, CvTermCriteria criteria= - cvTermCriteria(CV_TERMCRIT_EPS+CV_TERMCRIT_ITER,30,DBL_EPSILON), + cvTermCriteria(cv::TermCriteria::EPS+cv::TermCriteria::MAX_ITER,30,DBL_EPSILON), bool completeSymmFlag=false ); bool update( const CvMat*& param, CvMat*& J, CvMat*& err ); bool updateAlt( const CvMat*& param, CvMat*& JtJ, CvMat*& JtErr, double*& errNorm ); @@ -422,4 +147,4 @@ class CV_EXPORTS CvLevMarq #endif -#endif /* __OPENCV_CALIB3D_C_H__ */ +#endif /* OPENCV_CALIB3D_C_H */ diff --git a/IPL/include/opencv/opencv2/ccalib.hpp b/IPL/include/opencv/opencv2/ccalib.hpp index 79df598..538ec0f 100644 --- a/IPL/include/opencv/opencv2/ccalib.hpp +++ b/IPL/include/opencv/opencv2/ccalib.hpp @@ -71,7 +71,7 @@ class CV_EXPORTS CustomPattern : public Algorithm bool isInitialized(); - void getPatternPoints(OutputArray original_points); + void getPatternPoints(std::vector& original_points); /**< Returns a vector of the original points. */ @@ -96,21 +96,21 @@ class CV_EXPORTS CustomPattern : public Algorithm Calls the calirateCamera function with the same inputs. */ - bool findRt(InputArray objectPoints, InputArray imagePoints, InputArray cameraMatrix, InputArray distCoeffs, - OutputArray rvec, OutputArray tvec, bool useExtrinsicGuess = false, int flags = SOLVEPNP_ITERATIVE); - bool findRt(InputArray image, InputArray cameraMatrix, InputArray distCoeffs, - OutputArray rvec, OutputArray tvec, bool useExtrinsicGuess = false, int flags = SOLVEPNP_ITERATIVE); + bool findRt(InputArray objectPoints, InputArray imagePoints, InputArray cameraMatrix, InputArray distCoeffs, + InputOutputArray rvec, InputOutputArray tvec, bool useExtrinsicGuess = false, int flags = SOLVEPNP_ITERATIVE); + bool findRt(InputArray image, InputArray cameraMatrix, InputArray distCoeffs, + InputOutputArray rvec, InputOutputArray tvec, bool useExtrinsicGuess = false, int flags = SOLVEPNP_ITERATIVE); /**< Uses solvePnP to find the rotation and translation of the pattern with respect to the camera frame. */ - bool findRtRANSAC(InputArray objectPoints, InputArray imagePoints, InputArray cameraMatrix, InputArray distCoeffs, - OutputArray rvec, OutputArray tvec, bool useExtrinsicGuess = false, int iterationsCount = 100, - float reprojectionError = 8.0, int minInliersCount = 100, OutputArray inliers = noArray(), int flags = SOLVEPNP_ITERATIVE); - bool findRtRANSAC(InputArray image, InputArray cameraMatrix, InputArray distCoeffs, - OutputArray rvec, OutputArray tvec, bool useExtrinsicGuess = false, int iterationsCount = 100, - float reprojectionError = 8.0, int minInliersCount = 100, OutputArray inliers = noArray(), int flags = SOLVEPNP_ITERATIVE); + bool findRtRANSAC(InputArray objectPoints, InputArray imagePoints, InputArray cameraMatrix, InputArray distCoeffs, + InputOutputArray rvec, InputOutputArray tvec, bool useExtrinsicGuess = false, int iterationsCount = 100, + float reprojectionError = 8.0, int minInliersCount = 100, OutputArray inliers = noArray(), int flags = SOLVEPNP_ITERATIVE); + bool findRtRANSAC(InputArray image, InputArray cameraMatrix, InputArray distCoeffs, + InputOutputArray rvec, InputOutputArray tvec, bool useExtrinsicGuess = false, int iterationsCount = 100, + float reprojectionError = 8.0, int minInliersCount = 100, OutputArray inliers = noArray(), int flags = SOLVEPNP_ITERATIVE); /**< Uses solvePnPRansac() */ diff --git a/IPL/include/opencv/opencv2/ccalib/omnidir.hpp b/IPL/include/opencv/opencv2/ccalib/omnidir.hpp index 25c41bf..d3132b3 100644 --- a/IPL/include/opencv/opencv2/ccalib/omnidir.hpp +++ b/IPL/include/opencv/opencv2/ccalib/omnidir.hpp @@ -39,12 +39,13 @@ // //M*/ -#include -#include - #ifndef __OPENCV_OMNIDIR_HPP__ #define __OPENCV_OMNIDIR_HPP__ +#include "opencv2/core.hpp" +#include "opencv2/core/affine.hpp" +#include + namespace cv { namespace omnidir @@ -102,6 +103,10 @@ namespace omnidir CV_EXPORTS_W void projectPoints(InputArray objectPoints, OutputArray imagePoints, InputArray rvec, InputArray tvec, InputArray K, double xi, InputArray D, OutputArray jacobian = noArray()); + /** @overload */ + CV_EXPORTS void projectPoints(InputArray objectPoints, OutputArray imagePoints, const Affine3d& affine, + InputArray K, double xi, InputArray D, OutputArray jacobian = noArray()); + /** @brief Undistort 2D image points for omnidirectional camera using CMei's model @param distorted Array of distorted image points, vector of Vec2f @@ -126,7 +131,7 @@ namespace omnidir @param R Rotation transform between the original and object space : 3x3 1-channel, or vector: 3x1/1x3, with depth CV_32F or CV_64F @param P New camera matrix (3x3) or new projection matrix (3x4) @param size Undistorted image size. - @param mltype Type of the first output map that can be CV_32FC1 or CV_16SC2 . See convertMaps() + @param m1type Type of the first output map that can be CV_32FC1 or CV_16SC2 . See convertMaps() for details. @param map1 The first output map. @param map2 The second output map. @@ -134,7 +139,7 @@ namespace omnidir are supported. */ CV_EXPORTS_W void initUndistortRectifyMap(InputArray K, InputArray D, InputArray xi, InputArray R, InputArray P, const cv::Size& size, - int mltype, OutputArray map1, OutputArray map2, int flags); + int m1type, OutputArray map1, OutputArray map2, int flags); /** @brief Undistort omnidirectional images to perspective images @@ -168,7 +173,7 @@ namespace omnidir @param idx Indices of images that pass initialization, which are really used in calibration. So the size of rvecs is the same as idx.total(). */ - CV_EXPORTS_W double calibrate(InputArray objectPoints, InputArray imagePoints, Size size, + CV_EXPORTS_W double calibrate(InputArrayOfArrays objectPoints, InputArrayOfArrays imagePoints, Size size, InputOutputArray K, InputOutputArray xi, InputOutputArray D, OutputArrayOfArrays rvecs, OutputArrayOfArrays tvecs, int flags, TermCriteria criteria, OutputArray idx=noArray()); @@ -278,8 +283,6 @@ namespace internal double computeMeanReproErrStereo(InputArrayOfArrays objectPoints, InputArrayOfArrays imagePoints1, InputArrayOfArrays imagePoints2, InputArray K1, InputArray K2, InputArray D1, InputArray D2, double xi1, double xi2, InputArray om, InputArray T, InputArrayOfArrays omL, InputArrayOfArrays TL); - void checkFixed(Mat &G, int flags, int n); - void subMatrix(const Mat& src, Mat& dst, const std::vector& cols, const std::vector& rows); void flags2idx(int flags, std::vector& idx, int n); @@ -309,4 +312,4 @@ namespace internal } // omnidir } //cv -#endif \ No newline at end of file +#endif diff --git a/IPL/include/opencv/opencv2/ccalib/randpattern.hpp b/IPL/include/opencv/opencv2/ccalib/randpattern.hpp index 9fc08f8..fb362bd 100644 --- a/IPL/include/opencv/opencv2/ccalib/randpattern.hpp +++ b/IPL/include/opencv/opencv2/ccalib/randpattern.hpp @@ -86,7 +86,14 @@ class CV_EXPORTS RandomPatternCornerFinder /* @brief Load pattern image and compute features for pattern @param patternImage image for "random" pattern generated by RandomPatternGenerator, run it first. */ - void loadPattern(cv::Mat patternImage); + void loadPattern(const cv::Mat &patternImage); + + /* @brief Load pattern and features + @param patternImage image for "random" pattern generated by RandomPatternGenerator, run it first. + @param patternKeyPoints keyPoints created from a FeatureDetector. + @param patternDescriptors descriptors created from a DescriptorExtractor. + */ + void loadPattern(const cv::Mat &patternImage, const std::vector &patternKeyPoints, const cv::Mat &patternDescriptors); /* @brief Compute matched object points and image points which are used for calibration The objectPoints (3D) and imagePoints (2D) are stored inside the class. Run getObjectPoints() @@ -108,11 +115,11 @@ class CV_EXPORTS RandomPatternCornerFinder /* @brief Get object(3D) points */ - std::vector getObjectPoints(); + const std::vector &getObjectPoints(); /* @brief and image(2D) points */ - std::vector getImagePoints(); + const std::vector &getImagePoints(); private: diff --git a/IPL/include/opencv/opencv2/core.hpp b/IPL/include/opencv/opencv2/core.hpp index 2e47658..ff9fa36 100644 --- a/IPL/include/opencv/opencv2/core.hpp +++ b/IPL/include/opencv/opencv2/core.hpp @@ -42,8 +42,8 @@ // //M*/ -#ifndef __OPENCV_CORE_HPP__ -#define __OPENCV_CORE_HPP__ +#ifndef OPENCV_CORE_HPP +#define OPENCV_CORE_HPP #ifndef __cplusplus # error core.hpp header must be compiled as C++ @@ -68,12 +68,17 @@ @defgroup core_c_glue Connections with C++ @} @defgroup core_array Operations on arrays + @defgroup core_async Asynchronous API @defgroup core_xml XML/YAML Persistence @defgroup core_cluster Clustering @defgroup core_utils Utility and system functions and macros @{ + @defgroup core_logging Logging facilities @defgroup core_utils_sse SSE utilities @defgroup core_utils_neon NEON utilities + @defgroup core_utils_vsx VSX utilities + @defgroup core_utils_softfloat Softfloat support + @defgroup core_utils_samples Utility functions for OpenCV samples @} @defgroup core_opengl OpenGL interoperability @defgroup core_ipp Intel IPP Asynchronous C/C++ Converters @@ -90,6 +95,7 @@ @{ @defgroup core_hal_intrin_impl Private implementation helpers @} + @defgroup core_lowlevel_api Low-level API for external libraries / plugins @} @} */ @@ -114,7 +120,7 @@ class CV_EXPORTS Exception : public std::exception */ Exception(); /*! - Full constructor. Normally the constuctor is not called explicitly. + Full constructor. Normally the constructor is not called explicitly. Instead, the macros CV_Error(), CV_Error_() and CV_Assert() are used. */ Exception(int _code, const String& _err, const String& _func, const String& _file, int _line); @@ -123,7 +129,7 @@ class CV_EXPORTS Exception : public std::exception /*! \return the error description and the context as a text string. */ - virtual const char *what() const throw(); + virtual const char *what() const throw() CV_OVERRIDE; void formatMessage(); String msg; ///< the formatted error message @@ -131,19 +137,19 @@ class CV_EXPORTS Exception : public std::exception int code; ///< error code @see CVStatus String err; ///< error description String func; ///< function name. Available only when the compiler supports getting it - String file; ///< source file name where the error has occured - int line; ///< line number in the source file where the error has occured + String file; ///< source file name where the error has occurred + int line; ///< line number in the source file where the error has occurred }; /*! @brief Signals an error and raises the exception. By default the function prints information about the error to stderr, then it either stops if cv::setBreakOnError() had been called before or raises the exception. -It is possible to alternate error processing by using cv::redirectError(). +It is possible to alternate error processing by using #redirectError(). @param exc the exception raisen. @deprecated drop this version */ -CV_EXPORTS void error( const Exception& exc ); +CV_EXPORTS CV_NORETURN void error(const Exception& exc); enum SortFlags { SORT_EVERY_ROW = 0, //!< each matrix row is sorted independently SORT_EVERY_COLUMN = 1, //!< each matrix column is sorted @@ -174,7 +180,7 @@ enum CovarFlags { /**The output covariance matrix is calculated as: \f[\texttt{scale} \cdot [ \texttt{vects} [0]- \texttt{mean} , \texttt{vects} [1]- \texttt{mean} ,...] \cdot [ \texttt{vects} [0]- \texttt{mean} , \texttt{vects} [1]- \texttt{mean} ,...]^T,\f] covar will be a square matrix of the same size as the total number of elements in each input - vector. One and only one of COVAR_SCRAMBLED and COVAR_NORMAL must be specified.*/ + vector. One and only one of #COVAR_SCRAMBLED and #COVAR_NORMAL must be specified.*/ COVAR_NORMAL = 1, /** If the flag is specified, the function does not calculate mean from the input vectors but, instead, uses the passed mean vector. This is useful if mean has been @@ -210,28 +216,6 @@ enum KmeansFlags { KMEANS_USE_INITIAL_LABELS = 1 }; -//! type of line -enum LineTypes { - FILLED = -1, - LINE_4 = 4, //!< 4-connected line - LINE_8 = 8, //!< 8-connected line - LINE_AA = 16 //!< antialiased line -}; - -//! Only a subset of Hershey fonts -//! are supported -enum HersheyFonts { - FONT_HERSHEY_SIMPLEX = 0, //!< normal size sans-serif font - FONT_HERSHEY_PLAIN = 1, //!< small size sans-serif font - FONT_HERSHEY_DUPLEX = 2, //!< normal size sans-serif font (more complex than FONT_HERSHEY_SIMPLEX) - FONT_HERSHEY_COMPLEX = 3, //!< normal size serif font - FONT_HERSHEY_TRIPLEX = 4, //!< normal size serif font (more complex than FONT_HERSHEY_COMPLEX) - FONT_HERSHEY_COMPLEX_SMALL = 5, //!< smaller version of FONT_HERSHEY_COMPLEX - FONT_HERSHEY_SCRIPT_SIMPLEX = 6, //!< hand-writing style font - FONT_HERSHEY_SCRIPT_COMPLEX = 7, //!< more complex variant of FONT_HERSHEY_SCRIPT_SIMPLEX - FONT_ITALIC = 16 //!< flag for italic font -}; - enum ReduceTypes { REDUCE_SUM = 0, //!< the output is the sum of all rows/columns of the matrix. REDUCE_AVG = 1, //!< the output is the mean vector of all rows/columns of the matrix. REDUCE_MAX = 2, //!< the output is the maximum (column/row-wise) of all rows/columns of the matrix. @@ -265,14 +249,19 @@ Normally, the function is not called directly. It is used inside filtering funct copyMakeBorder. @param p 0-based coordinate of the extrapolated pixel along one of the axes, likely \<0 or \>= len @param len Length of the array along the corresponding axis. -@param borderType Border type, one of the cv::BorderTypes, except for cv::BORDER_TRANSPARENT and -cv::BORDER_ISOLATED . When borderType==cv::BORDER_CONSTANT , the function always returns -1, regardless +@param borderType Border type, one of the #BorderTypes, except for #BORDER_TRANSPARENT and +#BORDER_ISOLATED . When borderType==#BORDER_CONSTANT , the function always returns -1, regardless of p and len. @sa copyMakeBorder */ CV_EXPORTS_W int borderInterpolate(int p, int len, int borderType); +/** @example samples/cpp/tutorial_code/ImgTrans/copyMakeBorder_demo.cpp +An example using copyMakeBorder function. +Check @ref tutorial_copyMakeBorder "the corresponding tutorial" for more details +*/ + /** @brief Forms a border around an image. The function copies the source image into the middle of the destination image. The areas to the @@ -300,14 +289,14 @@ function does not copy src itself but simply constructs the border, for example: @endcode @note When the source image is a part (ROI) of a bigger image, the function will try to use the pixels outside of the ROI to form a border. To disable this feature and always do extrapolation, as -if src was not a ROI, use borderType | BORDER_ISOLATED. +if src was not a ROI, use borderType | #BORDER_ISOLATED. @param src Source image. @param dst Destination image of the same type as src and the size Size(src.cols+left+right, src.rows+top+bottom) . -@param top -@param bottom -@param left +@param top the top pixels +@param bottom the bottom pixels +@param left the left pixels @param right Parameter specifying how many pixels in each direction from the source image rectangle to extrapolate. For example, top=1, bottom=1, left=1, right=1 mean that 1 pixel-wide border needs to be built. @@ -426,13 +415,18 @@ CV_EXPORTS_W void multiply(InputArray src1, InputArray src2, /** @brief Performs per-element division of two arrays or a scalar by an array. -The functions divide divide one array by another: +The function cv::divide divides one array by another: \f[\texttt{dst(I) = saturate(src1(I)*scale/src2(I))}\f] or a scalar by an array when there is no src1 : \f[\texttt{dst(I) = saturate(scale/src2(I))}\f] -When src2(I) is zero, dst(I) will also be zero. Different channels of -multi-channel arrays are processed independently. +Different channels of multi-channel arrays are processed independently. + +For integer types when src2(I) is zero, dst(I) will also be zero. + +@note In case of floating point data there is no special defined behavior for zero src2(I) values. +Regular floating-point division is used. +Expect correct IEEE-754 behaviour for floating-point data (with NaN, Inf result values). @note Saturation is not applied when the output array has the depth CV_32S. You may even get result of an incorrect sign in the case of overflow. @@ -471,6 +465,10 @@ The function can also be emulated with a matrix expression, for example: */ CV_EXPORTS_W void scaleAdd(InputArray src1, double alpha, InputArray src2, OutputArray dst); +/** @example samples/cpp/tutorial_code/HighGUI/AddingImagesTrackbar.cpp +Check @ref tutorial_trackbar "the corresponding tutorial" for more details +*/ + /** @brief Calculates the weighted sum of two arrays. The function addWeighted calculates the weighted sum of two arrays as follows: @@ -524,6 +522,18 @@ For example: CV_EXPORTS_W void convertScaleAbs(InputArray src, OutputArray dst, double alpha = 1, double beta = 0); +/** @brief Converts an array to half precision floating number. + +This function converts FP32 (single precision floating point) from/to FP16 (half precision floating point). CV_16S format is used to represent FP16 data. +There are two use modes (src -> dst): CV_32F -> CV_16S and CV_16S -> CV_32F. The input array has to have type of CV_32F or +CV_16S to represent the bit depth. If the input array is neither of them, the function will raise an error. +The format of half precision floating point is defined in IEEE 754-2008. + +@param src input array. +@param dst output array. +*/ +CV_EXPORTS_W void convertFp16(InputArray src, OutputArray dst); + /** @brief Performs a look-up table transform of an array. The function LUT fills the output array with values from the look-up table. Indices of the entries @@ -542,7 +552,7 @@ CV_EXPORTS_W void LUT(InputArray src, InputArray lut, OutputArray dst); /** @brief Calculates the sum of array elements. -The functions sum calculate and return the sum of array elements, +The function cv::sum calculates and returns the sum of array elements, independently for each channel. @param src input array that must have from 1 to 4 channels. @sa countNonZero, mean, meanStdDev, norm, minMaxLoc, reduce @@ -581,17 +591,17 @@ or // access pixel coordinates Point pnt = locations[i]; @endcode -@param src single-channel array (type CV_8UC1) +@param src single-channel array @param idx the output array, type of cv::Mat or std::vector, corresponding to non-zero indices in the input */ CV_EXPORTS_W void findNonZero( InputArray src, OutputArray idx ); /** @brief Calculates an average (mean) of array elements. -The function mean calculates the mean value M of array elements, +The function cv::mean calculates the mean value M of array elements, independently for each channel, and return it: \f[\begin{array}{l} N = \sum _{I: \; \texttt{mask} (I) \ne 0} 1 \\ M_c = \left ( \sum _{I: \; \texttt{mask} (I) \ne 0}{ \texttt{mtx} (I)_c} \right )/N \end{array}\f] -When all the mask elements are 0's, the functions return Scalar::all(0) +When all the mask elements are 0's, the function returns Scalar::all(0) @param src input array that should have from 1 to 4 channels so that the result can be stored in Scalar_ . @param mask optional operation mask. @@ -601,11 +611,11 @@ CV_EXPORTS_W Scalar mean(InputArray src, InputArray mask = noArray()); /** Calculates a mean and standard deviation of array elements. -The function meanStdDev calculates the mean and the standard deviation M +The function cv::meanStdDev calculates the mean and the standard deviation M of array elements independently for each channel and returns it via the output parameters: \f[\begin{array}{l} N = \sum _{I, \texttt{mask} (I) \ne 0} 1 \\ \texttt{mean} _c = \frac{\sum_{ I: \; \texttt{mask}(I) \ne 0} \texttt{src} (I)_c}{N} \\ \texttt{stddev} _c = \sqrt{\frac{\sum_{ I: \; \texttt{mask}(I) \ne 0} \left ( \texttt{src} (I)_c - \texttt{mean} _c \right )^2}{N}} \end{array}\f] -When all the mask elements are 0's, the functions return +When all the mask elements are 0's, the function returns mean=stddev=Scalar::all(0). @note The calculated standard deviation is only the diagonal of the complete normalized covariance matrix. If the full matrix is needed, you @@ -615,69 +625,90 @@ then pass the matrix to calcCovarMatrix . @param src input array that should have from 1 to 4 channels so that the results can be stored in Scalar_ 's. @param mean output parameter: calculated mean value. -@param stddev output parameter: calculateded standard deviation. +@param stddev output parameter: calculated standard deviation. @param mask optional operation mask. @sa countNonZero, mean, norm, minMaxLoc, calcCovarMatrix */ CV_EXPORTS_W void meanStdDev(InputArray src, OutputArray mean, OutputArray stddev, InputArray mask=noArray()); -/** @brief Calculates an absolute array norm, an absolute difference norm, or a -relative difference norm. - -The functions norm calculate an absolute norm of src1 (when there is no -src2 ): - -\f[norm = \forkthree{\|\texttt{src1}\|_{L_{\infty}} = \max _I | \texttt{src1} (I)|}{if \(\texttt{normType} = \texttt{NORM_INF}\) } -{ \| \texttt{src1} \| _{L_1} = \sum _I | \texttt{src1} (I)|}{if \(\texttt{normType} = \texttt{NORM_L1}\) } -{ \| \texttt{src1} \| _{L_2} = \sqrt{\sum_I \texttt{src1}(I)^2} }{if \(\texttt{normType} = \texttt{NORM_L2}\) }\f] - -or an absolute or relative difference norm if src2 is there: - -\f[norm = \forkthree{\|\texttt{src1}-\texttt{src2}\|_{L_{\infty}} = \max _I | \texttt{src1} (I) - \texttt{src2} (I)|}{if \(\texttt{normType} = \texttt{NORM_INF}\) } -{ \| \texttt{src1} - \texttt{src2} \| _{L_1} = \sum _I | \texttt{src1} (I) - \texttt{src2} (I)|}{if \(\texttt{normType} = \texttt{NORM_L1}\) } -{ \| \texttt{src1} - \texttt{src2} \| _{L_2} = \sqrt{\sum_I (\texttt{src1}(I) - \texttt{src2}(I))^2} }{if \(\texttt{normType} = \texttt{NORM_L2}\) }\f] - -or - -\f[norm = \forkthree{\frac{\|\texttt{src1}-\texttt{src2}\|_{L_{\infty}} }{\|\texttt{src2}\|_{L_{\infty}} }}{if \(\texttt{normType} = \texttt{NORM_RELATIVE_INF}\) } -{ \frac{\|\texttt{src1}-\texttt{src2}\|_{L_1} }{\|\texttt{src2}\|_{L_1}} }{if \(\texttt{normType} = \texttt{NORM_RELATIVE_L1}\) } -{ \frac{\|\texttt{src1}-\texttt{src2}\|_{L_2} }{\|\texttt{src2}\|_{L_2}} }{if \(\texttt{normType} = \texttt{NORM_RELATIVE_L2}\) }\f] - -The functions norm return the calculated norm. +/** @brief Calculates the absolute norm of an array. + +This version of #norm calculates the absolute norm of src1. The type of norm to calculate is specified using #NormTypes. + +As example for one array consider the function \f$r(x)= \begin{pmatrix} x \\ 1-x \end{pmatrix}, x \in [-1;1]\f$. +The \f$ L_{1}, L_{2} \f$ and \f$ L_{\infty} \f$ norm for the sample value \f$r(-1) = \begin{pmatrix} -1 \\ 2 \end{pmatrix}\f$ +is calculated as follows +\f{align*} + \| r(-1) \|_{L_1} &= |-1| + |2| = 3 \\ + \| r(-1) \|_{L_2} &= \sqrt{(-1)^{2} + (2)^{2}} = \sqrt{5} \\ + \| r(-1) \|_{L_\infty} &= \max(|-1|,|2|) = 2 +\f} +and for \f$r(0.5) = \begin{pmatrix} 0.5 \\ 0.5 \end{pmatrix}\f$ the calculation is +\f{align*} + \| r(0.5) \|_{L_1} &= |0.5| + |0.5| = 1 \\ + \| r(0.5) \|_{L_2} &= \sqrt{(0.5)^{2} + (0.5)^{2}} = \sqrt{0.5} \\ + \| r(0.5) \|_{L_\infty} &= \max(|0.5|,|0.5|) = 0.5. +\f} +The following graphic shows all values for the three norm functions \f$\| r(x) \|_{L_1}, \| r(x) \|_{L_2}\f$ and \f$\| r(x) \|_{L_\infty}\f$. +It is notable that the \f$ L_{1} \f$ norm forms the upper and the \f$ L_{\infty} \f$ norm forms the lower border for the example function \f$ r(x) \f$. +![Graphs for the different norm functions from the above example](pics/NormTypes_OneArray_1-2-INF.png) When the mask parameter is specified and it is not empty, the norm is + +If normType is not specified, #NORM_L2 is used. calculated only over the region specified by the mask. -A multi-channel input arrays are treated as a single-channel, that is, +Multi-channel input arrays are treated as single-channel arrays, that is, the results for all channels are combined. +Hamming norms can only be calculated with CV_8U depth arrays. + @param src1 first input array. -@param normType type of the norm (see cv::NormTypes). +@param normType type of the norm (see #NormTypes). @param mask optional operation mask; it must have the same size as src1 and CV_8UC1 type. */ CV_EXPORTS_W double norm(InputArray src1, int normType = NORM_L2, InputArray mask = noArray()); -/** @overload +/** @brief Calculates an absolute difference norm or a relative difference norm. + +This version of cv::norm calculates the absolute difference norm +or the relative difference norm of arrays src1 and src2. +The type of norm to calculate is specified using #NormTypes. + @param src1 first input array. @param src2 second input array of the same size and the same type as src1. -@param normType type of the norm (cv::NormTypes). +@param normType type of the norm (see #NormTypes). @param mask optional operation mask; it must have the same size as src1 and CV_8UC1 type. */ CV_EXPORTS_W double norm(InputArray src1, InputArray src2, int normType = NORM_L2, InputArray mask = noArray()); /** @overload @param src first input array. -@param normType type of the norm (see cv::NormTypes). +@param normType type of the norm (see #NormTypes). */ CV_EXPORTS double norm( const SparseMat& src, int normType ); -/** @brief computes PSNR image/video quality metric +/** @brief Computes the Peak Signal-to-Noise Ratio (PSNR) image quality metric. + +This function calculates the Peak Signal-to-Noise Ratio (PSNR) image quality metric in decibels (dB), +between two input arrays src1 and src2. The arrays must have the same type. + +The PSNR is calculated as follows: + +\f[ +\texttt{PSNR} = 10 \cdot \log_{10}{\left( \frac{R^2}{MSE} \right) } +\f] + +where R is the maximum integer value of depth (e.g. 255 in the case of CV_8U data) +and MSE is the mean squared error between the two arrays. + +@param src1 first input array. +@param src2 second input array of the same size as src1. +@param R the maximum pixel value (255 by default) -see http://en.wikipedia.org/wiki/Peak_signal-to-noise_ratio for details -@todo document */ -CV_EXPORTS_W double PSNR(InputArray src1, InputArray src2); +CV_EXPORTS_W double PSNR(InputArray src1, InputArray src2, double R=255.); /** @brief naive nearest neighbor finder @@ -692,7 +723,7 @@ CV_EXPORTS_W void batchDistance(InputArray src1, InputArray src2, /** @brief Normalizes the norm or value range of an array. -The functions normalize scale and shift the input array elements so that +The function cv::normalize normalizes scale and shift the input array elements so that \f[\| \texttt{dst} \| _{L_p}= \texttt{alpha}\f] (where p=Inf, 1 or 2) when normType=NORM_INF, NORM_L1, or NORM_L2, respectively; or so that \f[\min _I \texttt{dst} (I)= \texttt{alpha} , \, \, \max _I \texttt{dst} (I)= \texttt{beta}\f] @@ -762,11 +793,11 @@ CV_EXPORTS void normalize( const SparseMat& src, SparseMat& dst, double alpha, i /** @brief Finds the global minimum and maximum in an array. -The functions minMaxLoc find the minimum and maximum element values and their positions. The +The function cv::minMaxLoc finds the minimum and maximum element values and their positions. The extremums are searched across the whole array or, if mask is not an empty array, in the specified array region. -The functions do not work with multi-channel arrays. If you need to find minimum or maximum +The function do not work with multi-channel arrays. If you need to find minimum or maximum elements across all the channels, use Mat::reshape first to reinterpret the array as single-channel. Or you may extract the particular channel using either extractImageCOI , or mixChannels , or split . @@ -785,7 +816,7 @@ CV_EXPORTS_W void minMaxLoc(InputArray src, CV_OUT double* minVal, /** @brief Finds the global minimum and maximum in an array -The function minMaxIdx finds the minimum and maximum element values and their positions. The +The function cv::minMaxIdx finds the minimum and maximum element values and their positions. The extremums are searched across the whole array or, if mask is not an empty array, in the specified array region. The function does not work with multi-channel arrays. If you need to find minimum or maximum elements across all the channels, use Mat::reshape first to reinterpret the array as @@ -823,17 +854,24 @@ CV_EXPORTS void minMaxLoc(const SparseMat& a, double* minVal, /** @brief Reduces a matrix to a vector. -The function reduce reduces the matrix to a vector by treating the matrix rows/columns as a set of +The function #reduce reduces the matrix to a vector by treating the matrix rows/columns as a set of 1D vectors and performing the specified operation on the vectors until a single row/column is obtained. For example, the function can be used to compute horizontal and vertical projections of a -raster image. In case of REDUCE_SUM and REDUCE_AVG , the output may have a larger element -bit-depth to preserve accuracy. And multi-channel arrays are also supported in these two reduction -modes. +raster image. In case of #REDUCE_MAX and #REDUCE_MIN , the output image should have the same type as the source one. +In case of #REDUCE_SUM and #REDUCE_AVG , the output may have a larger element bit-depth to preserve accuracy. +And multi-channel arrays are also supported in these two reduction modes. + +The following code demonstrates its usage for a single channel matrix. +@snippet snippets/core_reduce.cpp example + +And the following code demonstrates its usage for a two-channel matrix. +@snippet snippets/core_reduce.cpp example2 + @param src input 2D matrix. @param dst output vector. Its size and type is defined by dim and dtype parameters. @param dim dimension index along which the matrix is reduced. 0 means that the matrix is reduced to a single row. 1 means that the matrix is reduced to a single column. -@param rtype reduction operation that could be one of cv::ReduceTypes +@param rtype reduction operation that could be one of #ReduceTypes @param dtype when negative, the output vector will have the same type as the input matrix, otherwise, its type will be CV_MAKE_TYPE(CV_MAT_DEPTH(dtype), src.channels()). @sa repeat @@ -842,12 +880,16 @@ CV_EXPORTS_W void reduce(InputArray src, OutputArray dst, int dim, int rtype, in /** @brief Creates one multi-channel array out of several single-channel ones. -The function merge merges several arrays to make a single multi-channel array. That is, each +The function cv::merge merges several arrays to make a single multi-channel array. That is, each element of the output array will be a concatenation of the elements of the input arrays, where elements of i-th input array are treated as mv[i].channels()-element vectors. The function cv::split does the reverse operation. If you need to shuffle channels in some other advanced way, use cv::mixChannels. + +The following example shows how to merge 3 single channel matrices into a single 3-channel matrix. +@snippet snippets/core_merge.cpp example + @param mv input array of matrices to be merged; all the matrices in mv must have the same size and the same depth. @param count number of input matrices when mv is a plain C array; it must be greater than zero. @@ -867,10 +909,14 @@ CV_EXPORTS_W void merge(InputArrayOfArrays mv, OutputArray dst); /** @brief Divides a multi-channel array into several single-channel arrays. -The functions split split a multi-channel array into separate single-channel arrays: +The function cv::split splits a multi-channel array into separate single-channel arrays: \f[\texttt{mv} [c](I) = \texttt{src} (I)_c\f] If you need to extract a single channel or do some other sophisticated channel permutation, use mixChannels . + +The following example demonstrates how to split a 3-channel matrix into 3 single channel matrices. +@snippet snippets/core_split.cpp example + @param src input multi-channel array. @param mvbegin output array; the number of arrays must match src.channels(); the arrays themselves are reallocated, if needed. @@ -889,7 +935,7 @@ output arrays. The function cv::mixChannels provides an advanced mechanism for shuffling image channels. -cv::split and cv::merge and some forms of cv::cvtColor are partial cases of cv::mixChannels . +cv::split,cv::merge,cv::extractChannel,cv::insertChannel and some forms of cv::cvtColor are partial cases of cv::mixChannels. In the example below, the code splits a 4-channel BGRA image into a 3-channel BGR (with B and R channels swapped) and a separate alpha-channel image: @@ -923,7 +969,7 @@ src[0].channels() + src[1].channels()-1, and so on, the same scheme is used for channels; as a special case, when fromTo[k\*2] is negative, the corresponding output channel is filled with zero . @param npairs number of index pairs in `fromTo`. -@sa cv::split, cv::merge, cv::cvtColor +@sa split, merge, extractChannel, insertChannel, cvtColor */ CV_EXPORTS void mixChannels(const Mat* src, size_t nsrcs, Mat* dst, size_t ndsts, const int* fromTo, size_t npairs); @@ -961,19 +1007,25 @@ filled with zero . CV_EXPORTS_W void mixChannels(InputArrayOfArrays src, InputOutputArrayOfArrays dst, const std::vector& fromTo); -/** @brief extracts a single channel from src (coi is 0-based index) -@todo document +/** @brief Extracts a single channel from src (coi is 0-based index) +@param src input array +@param dst output array +@param coi index of channel to extract +@sa mixChannels, split */ CV_EXPORTS_W void extractChannel(InputArray src, OutputArray dst, int coi); -/** @brief inserts a single channel to dst (coi is 0-based index) -@todo document +/** @brief Inserts a single channel to dst (coi is 0-based index) +@param src input array +@param dst output array +@param coi index of channel for insertion +@sa mixChannels, merge */ CV_EXPORTS_W void insertChannel(InputArray src, InputOutputArray dst, int coi); /** @brief Flips a 2D array around vertical, horizontal, or both axes. -The function flip flips the array in one of three different ways (row +The function cv::flip flips the array in one of three different ways (row and column indices are 0-based): \f[\texttt{dst} _{ij} = \left\{ @@ -1005,6 +1057,24 @@ around both axes. */ CV_EXPORTS_W void flip(InputArray src, OutputArray dst, int flipCode); +enum RotateFlags { + ROTATE_90_CLOCKWISE = 0, //! --DBL_MAX and maxVal \< DBL_MAX, the functions also check that each value is between minVal and +The function cv::checkRange checks that every array element is neither NaN nor infinite. When minVal \> +-DBL_MAX and maxVal \< DBL_MAX, the function also checks that each value is between minVal and maxVal. In case of multi-channel arrays, each channel is processed independently. If some values are out of range, position of the first outlier is stored in pos (when pos != NULL). Then, the -functions either return false (when quiet=true) or throw an exception. +function either returns false (when quiet=true) or throws an exception. @param a input array. @param quiet a flag, indicating whether the functions quietly return false when the array elements are out of range or they throw an exception. @@ -1542,7 +1620,7 @@ CV_EXPORTS_W void patchNaNs(InputOutputArray a, double val = 0); /** @brief Performs generalized matrix multiplication. -The function performs generalized matrix multiplication similar to the +The function cv::gemm performs generalized matrix multiplication similar to the gemm functions in BLAS level 3. For example, `gemm(src1, src2, alpha, src3, beta, dst, GEMM_1_T + GEMM_3_T)` corresponds to @@ -1573,7 +1651,7 @@ CV_EXPORTS_W void gemm(InputArray src1, InputArray src2, double alpha, /** @brief Calculates the product of a matrix and its transposition. -The function mulTransposed calculates the product of src and its +The function cv::mulTransposed calculates the product of src and its transposition: \f[\texttt{dst} = \texttt{scale} ( \texttt{src} - \texttt{delta} )^T ( \texttt{src} - \texttt{delta} )\f] if aTa=true , and @@ -1605,9 +1683,9 @@ CV_EXPORTS_W void mulTransposed( InputArray src, OutputArray dst, bool aTa, /** @brief Transposes a matrix. -The function transpose transposes the matrix src : +The function cv::transpose transposes the matrix src : \f[\texttt{dst} (i,j) = \texttt{src} (j,i)\f] -@note No complex conjugation is done in case of a complex matrix. It it +@note No complex conjugation is done in case of a complex matrix. It should be done separately if needed. @param src input array. @param dst output array of the same type as src. @@ -1616,7 +1694,7 @@ CV_EXPORTS_W void transpose(InputArray src, OutputArray dst); /** @brief Performs the matrix transformation of every array element. -The function transform performs the matrix transformation of every +The function cv::transform performs the matrix transformation of every element of the array src and stores the results in dst : \f[\texttt{dst} (I) = \texttt{m} \cdot \texttt{src} (I)\f] (when m.cols=src.channels() ), or @@ -1636,13 +1714,13 @@ m.cols or m.cols-1. @param dst output array of the same size and depth as src; it has as many channels as m.rows. @param m transformation 2x2 or 2x3 floating-point matrix. -@sa perspectiveTransform, getAffineTransform, estimateRigidTransform, warpAffine, warpPerspective +@sa perspectiveTransform, getAffineTransform, estimateAffine2D, warpAffine, warpPerspective */ CV_EXPORTS_W void transform(InputArray src, OutputArray dst, InputArray m ); /** @brief Performs the perspective matrix transformation of vectors. -The function perspectiveTransform transforms every element of src by +The function cv::perspectiveTransform transforms every element of src by treating it as a 2D or 3D vector, in the following way: \f[(x, y, z) \rightarrow (x'/w, y'/w, z'/w)\f] where @@ -1667,24 +1745,25 @@ element is a 2D/3D vector to be transformed. */ CV_EXPORTS_W void perspectiveTransform(InputArray src, OutputArray dst, InputArray m ); -/** @brief Copies the lower or the upper half of a square matrix to another half. +/** @brief Copies the lower or the upper half of a square matrix to its another half. -The function completeSymm copies the lower half of a square matrix to +The function cv::completeSymm copies the lower or the upper half of a square matrix to its another half. The matrix diagonal remains unchanged: -* \f$\texttt{mtx}_{ij}=\texttt{mtx}_{ji}\f$ for \f$i > j\f$ if + - \f$\texttt{m}_{ij}=\texttt{m}_{ji}\f$ for \f$i > j\f$ if lowerToUpper=false -* \f$\texttt{mtx}_{ij}=\texttt{mtx}_{ji}\f$ for \f$i < j\f$ if + - \f$\texttt{m}_{ij}=\texttt{m}_{ji}\f$ for \f$i < j\f$ if lowerToUpper=true -@param mtx input-output floating-point square matrix. + +@param m input-output floating-point square matrix. @param lowerToUpper operation flag; if true, the lower half is copied to the upper half. Otherwise, the upper half is copied to the lower half. @sa flip, transpose */ -CV_EXPORTS_W void completeSymm(InputOutputArray mtx, bool lowerToUpper = false); +CV_EXPORTS_W void completeSymm(InputOutputArray m, bool lowerToUpper = false); /** @brief Initializes a scaled identity matrix. -The function setIdentity initializes a scaled identity matrix: +The function cv::setIdentity initializes a scaled identity matrix: \f[\texttt{mtx} (i,j)= \fork{\texttt{value}}{ if \(i=j\)}{0}{otherwise}\f] The function can also be emulated using the matrix initializers and the @@ -1701,7 +1780,7 @@ CV_EXPORTS_W void setIdentity(InputOutputArray mtx, const Scalar& s = Scalar(1)) /** @brief Returns the determinant of a square floating-point matrix. -The function determinant calculates and returns the determinant of the +The function cv::determinant calculates and returns the determinant of the specified matrix. For small matrices ( mtx.cols=mtx.rows\<=3 ), the direct method is used. For larger matrices, the function uses LU factorization with partial pivoting. @@ -1716,7 +1795,7 @@ CV_EXPORTS_W double determinant(InputArray mtx); /** @brief Returns the trace of a matrix. -The function trace returns the sum of the diagonal elements of the +The function cv::trace returns the sum of the diagonal elements of the matrix mtx . \f[\mathrm{tr} ( \texttt{mtx} ) = \sum _i \texttt{mtx} (i,i)\f] @param mtx input matrix. @@ -1725,20 +1804,20 @@ CV_EXPORTS_W Scalar trace(InputArray mtx); /** @brief Finds the inverse or pseudo-inverse of a matrix. -The function invert inverts the matrix src and stores the result in dst +The function cv::invert inverts the matrix src and stores the result in dst . When the matrix src is singular or non-square, the function calculates the pseudo-inverse matrix (the dst matrix) so that norm(src\*dst - I) is minimal, where I is an identity matrix. -In case of the DECOMP_LU method, the function returns non-zero value if +In case of the #DECOMP_LU method, the function returns non-zero value if the inverse has been successfully calculated and 0 if src is singular. -In case of the DECOMP_SVD method, the function returns the inverse +In case of the #DECOMP_SVD method, the function returns the inverse condition number of src (the ratio of the smallest singular value to the largest singular value) and 0 if src is singular. The SVD method calculates a pseudo-inverse matrix if src is singular. -Similarly to DECOMP_LU, the method DECOMP_CHOLESKY works only with +Similarly to #DECOMP_LU, the method #DECOMP_CHOLESKY works only with non-singular square matrices that should also be symmetrical and positively defined. In this case, the function stores the inverted matrix in dst and returns non-zero. Otherwise, it returns 0. @@ -1752,12 +1831,12 @@ CV_EXPORTS_W double invert(InputArray src, OutputArray dst, int flags = DECOMP_L /** @brief Solves one or more linear systems or least-squares problems. -The function solve solves a linear system or least-squares problem (the +The function cv::solve solves a linear system or least-squares problem (the latter is possible with SVD or QR methods, or by specifying the flag -DECOMP_NORMAL ): +#DECOMP_NORMAL ): \f[\texttt{dst} = \arg \min _X \| \texttt{src1} \cdot \texttt{X} - \texttt{src2} \|\f] -If DECOMP_LU or DECOMP_CHOLESKY method is used, the function returns 1 +If #DECOMP_LU or #DECOMP_CHOLESKY method is used, the function returns 1 if src1 (or \f$\texttt{src1}^T\texttt{src1}\f$ ) is non-singular. Otherwise, it returns 0. In the latter case, dst is not valid. Other methods find a pseudo-solution in case of a singular left-hand side part. @@ -1769,7 +1848,7 @@ will not do the work. Use SVD::solveZ instead. @param src1 input matrix on the left-hand side of the system. @param src2 input matrix on the right-hand side of the system. @param dst output solution. -@param flags solution (matrix inversion) method (cv::DecompTypes) +@param flags solution (matrix inversion) method (#DecompTypes) @sa invert, SVD, eigen */ CV_EXPORTS_W bool solve(InputArray src1, InputArray src2, @@ -1777,7 +1856,7 @@ CV_EXPORTS_W bool solve(InputArray src1, InputArray src2, /** @brief Sorts each row or each column of a matrix. -The function sort sorts each matrix row or each matrix column in +The function cv::sort sorts each matrix row or each matrix column in ascending or descending order. So you should pass two operation flags to get desired behaviour. If you want to sort matrix rows or columns lexicographically, you can use STL std::sort generic function with the @@ -1785,14 +1864,14 @@ proper comparison predicate. @param src input single-channel array. @param dst output array of the same size and type as src. -@param flags operation flags, a combination of cv::SortFlags +@param flags operation flags, a combination of #SortFlags @sa sortIdx, randShuffle */ CV_EXPORTS_W void sort(InputArray src, OutputArray dst, int flags); /** @brief Sorts each row or each column of a matrix. -The function sortIdx sorts each matrix row or each matrix column in the +The function cv::sortIdx sorts each matrix row or each matrix column in the ascending or descending order. So you should pass two operation flags to get desired behaviour. Instead of reordering the elements themselves, it stores the indices of sorted elements in the output array. For example: @@ -1821,12 +1900,13 @@ The function solveCubic finds the real roots of a cubic equation: The roots are stored in the roots array. @param coeffs equation coefficients, an array of 3 or 4 elements. @param roots output array of real roots that has 1 or 3 elements. +@return number of real roots. It can be 0, 1 or 2. */ CV_EXPORTS_W int solveCubic(InputArray coeffs, OutputArray roots); /** @brief Finds the real or complex roots of a polynomial equation. -The function solvePoly finds real and complex roots of a polynomial equation: +The function cv::solvePoly finds real and complex roots of a polynomial equation: \f[\texttt{coeffs} [n] x^{n} + \texttt{coeffs} [n-1] x^{n-1} + ... + \texttt{coeffs} [1] x + \texttt{coeffs} [0] = 0\f] @param coeffs array of polynomial coefficients. @param roots output (complex) array of roots. @@ -1836,13 +1916,14 @@ CV_EXPORTS_W double solvePoly(InputArray coeffs, OutputArray roots, int maxIters /** @brief Calculates eigenvalues and eigenvectors of a symmetric matrix. -The functions eigen calculate just eigenvalues, or eigenvalues and eigenvectors of the symmetric +The function cv::eigen calculates just eigenvalues, or eigenvalues and eigenvectors of the symmetric matrix src: @code src*eigenvectors.row(i).t() = eigenvalues.at(i)*eigenvectors.row(i).t() @endcode -@note in the new and the old interfaces different ordering of eigenvalues and eigenvectors -parameters is used. + +@note Use cv::eigenNonSymmetric for calculation of real eigenvalues and eigenvectors of non-symmetric matrix. + @param src input matrix that must have CV_32FC1 or CV_64FC1 type, square size and be symmetrical (src ^T^ == src). @param eigenvalues output vector of eigenvalues of the same type as src; the eigenvalues are stored @@ -1850,20 +1931,37 @@ in the descending order. @param eigenvectors output matrix of eigenvectors; it has the same size and type as src; the eigenvectors are stored as subsequent matrix rows, in the same order as the corresponding eigenvalues. -@sa completeSymm , PCA +@sa eigenNonSymmetric, completeSymm , PCA */ CV_EXPORTS_W bool eigen(InputArray src, OutputArray eigenvalues, OutputArray eigenvectors = noArray()); +/** @brief Calculates eigenvalues and eigenvectors of a non-symmetric matrix (real eigenvalues only). + +@note Assumes real eigenvalues. + +The function calculates eigenvalues and eigenvectors (optional) of the square matrix src: +@code + src*eigenvectors.row(i).t() = eigenvalues.at(i)*eigenvectors.row(i).t() +@endcode + +@param src input matrix (CV_32FC1 or CV_64FC1 type). +@param eigenvalues output vector of eigenvalues (type is the same type as src). +@param eigenvectors output matrix of eigenvectors (type is the same type as src). The eigenvectors are stored as subsequent matrix rows, in the same order as the corresponding eigenvalues. +@sa eigen +*/ +CV_EXPORTS_W void eigenNonSymmetric(InputArray src, OutputArray eigenvalues, + OutputArray eigenvectors); + /** @brief Calculates the covariance matrix of a set of vectors. -The functions calcCovarMatrix calculate the covariance matrix and, optionally, the mean vector of +The function cv::calcCovarMatrix calculates the covariance matrix and, optionally, the mean vector of the set of input vectors. @param samples samples stored as separate matrices @param nsamples number of samples @param covar output covariance matrix of the type ctype and square size. @param mean input or output (depending on the flags) array as the average value of the input vectors. -@param flags operation flags as a combination of cv::CovarFlags +@param flags operation flags as a combination of #CovarFlags @param ctype type of the matrixl; it equals 'CV_64F' by default. @sa PCA, mulTransposed, Mahalanobis @todo InputArrayOfArrays @@ -1872,11 +1970,11 @@ CV_EXPORTS void calcCovarMatrix( const Mat* samples, int nsamples, Mat& covar, M int flags, int ctype = CV_64F); /** @overload -@note use cv::COVAR_ROWS or cv::COVAR_COLS flag +@note use #COVAR_ROWS or #COVAR_COLS flag @param samples samples stored as rows/columns of a single matrix. @param covar output covariance matrix of the type ctype and square size. @param mean input or output (depending on the flags) array as the average value of the input vectors. -@param flags operation flags as a combination of cv::CovarFlags +@param flags operation flags as a combination of #CovarFlags @param ctype type of the matrixl; it equals 'CV_64F' by default. */ CV_EXPORTS_W void calcCovarMatrix( InputArray samples, OutputArray covar, @@ -1886,10 +1984,20 @@ CV_EXPORTS_W void calcCovarMatrix( InputArray samples, OutputArray covar, CV_EXPORTS_W void PCACompute(InputArray data, InputOutputArray mean, OutputArray eigenvectors, int maxComponents = 0); +/** wrap PCA::operator() and add eigenvalues output parameter */ +CV_EXPORTS_AS(PCACompute2) void PCACompute(InputArray data, InputOutputArray mean, + OutputArray eigenvectors, OutputArray eigenvalues, + int maxComponents = 0); + /** wrap PCA::operator() */ CV_EXPORTS_W void PCACompute(InputArray data, InputOutputArray mean, OutputArray eigenvectors, double retainedVariance); +/** wrap PCA::operator() and add eigenvalues output parameter */ +CV_EXPORTS_AS(PCACompute2) void PCACompute(InputArray data, InputOutputArray mean, + OutputArray eigenvectors, OutputArray eigenvalues, + double retainedVariance); + /** wrap PCA::project */ CV_EXPORTS_W void PCAProject(InputArray data, InputArray mean, InputArray eigenvectors, OutputArray result); @@ -1907,10 +2015,10 @@ CV_EXPORTS_W void SVBackSubst( InputArray w, InputArray u, InputArray vt, /** @brief Calculates the Mahalanobis distance between two vectors. -The function Mahalanobis calculates and returns the weighted distance between two vectors: +The function cv::Mahalanobis calculates and returns the weighted distance between two vectors: \f[d( \texttt{vec1} , \texttt{vec2} )= \sqrt{\sum_{i,j}{\texttt{icovar(i,j)}\cdot(\texttt{vec1}(I)-\texttt{vec2}(I))\cdot(\texttt{vec1(j)}-\texttt{vec2(j)})} }\f] -The covariance matrix may be calculated using the cv::calcCovarMatrix function and then inverted using -the invert function (preferably using the cv::DECOMP_SVD method, as the most accurate). +The covariance matrix may be calculated using the #calcCovarMatrix function and then inverted using +the invert function (preferably using the #DECOMP_SVD method, as the most accurate). @param v1 first 1D input vector. @param v2 second 1D input vector. @param icovar inverse covariance matrix. @@ -1919,7 +2027,7 @@ CV_EXPORTS_W double Mahalanobis(InputArray v1, InputArray v2, InputArray icovar) /** @brief Performs a forward or inverse Discrete Fourier transform of a 1D or 2D floating-point array. -The function performs one of the following: +The function cv::dft performs one of the following: - Forward the Fourier transform of a 1D vector of N elements: \f[Y = F^{(N)} \cdot X,\f] where \f$F^{(N)}_{jk}=\exp(-2\pi i j k/N)\f$ and \f$i=\sqrt{-1}\f$ @@ -1940,28 +2048,28 @@ is how 2D *CCS* spectrum looks: In case of 1D transform of a real vector, the output looks like the first row of the matrix above. So, the function chooses an operation mode depending on the flags and size of the input array: -- If DFT_ROWS is set or the input array has a single row or single column, the function - performs a 1D forward or inverse transform of each row of a matrix when DFT_ROWS is set. +- If #DFT_ROWS is set or the input array has a single row or single column, the function + performs a 1D forward or inverse transform of each row of a matrix when #DFT_ROWS is set. Otherwise, it performs a 2D transform. -- If the input array is real and DFT_INVERSE is not set, the function performs a forward 1D or +- If the input array is real and #DFT_INVERSE is not set, the function performs a forward 1D or 2D transform: - - When DFT_COMPLEX_OUTPUT is set, the output is a complex matrix of the same size as + - When #DFT_COMPLEX_OUTPUT is set, the output is a complex matrix of the same size as input. - - When DFT_COMPLEX_OUTPUT is not set, the output is a real matrix of the same size as + - When #DFT_COMPLEX_OUTPUT is not set, the output is a real matrix of the same size as input. In case of 2D transform, it uses the packed format as shown above. In case of a single 1D transform, it looks like the first row of the matrix above. In case of - multiple 1D transforms (when using the DFT_ROWS flag), each row of the output matrix + multiple 1D transforms (when using the #DFT_ROWS flag), each row of the output matrix looks like the first row of the matrix above. -- If the input array is complex and either DFT_INVERSE or DFT_REAL_OUTPUT are not set, the +- If the input array is complex and either #DFT_INVERSE or #DFT_REAL_OUTPUT are not set, the output is a complex array of the same size as input. The function performs a forward or inverse 1D or 2D transform of the whole input array or each row of the input array independently, depending on the flags DFT_INVERSE and DFT_ROWS. -- When DFT_INVERSE is set and the input array is real, or it is complex but DFT_REAL_OUTPUT +- When #DFT_INVERSE is set and the input array is real, or it is complex but #DFT_REAL_OUTPUT is set, the output is a real array of the same size as input. The function performs a 1D or 2D inverse transformation of the whole input array or each individual row, depending on the flags - DFT_INVERSE and DFT_ROWS. + #DFT_INVERSE and #DFT_ROWS. -If DFT_SCALE is set, the scaling is done after the transformation. +If #DFT_SCALE is set, the scaling is done after the transformation. Unlike dct , the function supports arrays of arbitrary size. But only those arrays are processed efficiently, whose sizes can be factorized in a product of small prime numbers (2, 3, and 5 in the @@ -2027,7 +2135,7 @@ To optimize this sample, consider the following approaches: - If different tiles in C can be calculated in parallel and, thus, the convolution is done by parts, the loop can be threaded. -All of the above improvements have been implemented in matchTemplate and filter2D . Therefore, by +All of the above improvements have been implemented in #matchTemplate and #filter2D . Therefore, by using them, you can get the performance even better than with the above theoretically optimal implementation. Though, those two functions actually calculate cross-correlation, not convolution, so you need to "flip" the second convolution operand B vertically and horizontally using flip . @@ -2040,10 +2148,10 @@ so you need to "flip" the second convolution operand B vertically and horizontal opencv_source/samples/python/dft.py @param src input array that could be real or complex. @param dst output array whose size and type depends on the flags . -@param flags transformation flags, representing a combination of the cv::DftFlags +@param flags transformation flags, representing a combination of the #DftFlags @param nonzeroRows when the parameter is not zero, the function assumes that only the first -nonzeroRows rows of the input array (DFT_INVERSE is not set) or only the first nonzeroRows of the -output array (DFT_INVERSE is set) contain non-zeros, thus, the function can handle the rest of the +nonzeroRows rows of the input array (#DFT_INVERSE is not set) or only the first nonzeroRows of the +output array (#DFT_INVERSE is set) contain non-zeros, thus, the function can handle the rest of the rows more efficiently and save some time; this technique is very useful for calculating array cross-correlation or convolution using DFT. @sa dct , getOptimalDFTSize , mulSpectrums, filter2D , matchTemplate , flip , cartToPolar , @@ -2053,13 +2161,13 @@ CV_EXPORTS_W void dft(InputArray src, OutputArray dst, int flags = 0, int nonzer /** @brief Calculates the inverse Discrete Fourier Transform of a 1D or 2D array. -idft(src, dst, flags) is equivalent to dft(src, dst, flags | DFT_INVERSE) . -@note None of dft and idft scales the result by default. So, you should pass DFT_SCALE to one of +idft(src, dst, flags) is equivalent to dft(src, dst, flags | #DFT_INVERSE) . +@note None of dft and idft scales the result by default. So, you should pass #DFT_SCALE to one of dft or idft explicitly to make these transforms mutually inverse. @sa dft, dct, idct, mulSpectrums, getOptimalDFTSize @param src input floating-point real or complex array. @param dst output array whose size and type depend on the flags. -@param flags operation flags (see dft and cv::DftFlags). +@param flags operation flags (see dft and #DftFlags). @param nonzeroRows number of dst rows to process; the rest of the rows have undefined content (see the convolution sample in dft description. */ @@ -2067,7 +2175,7 @@ CV_EXPORTS_W void idft(InputArray src, OutputArray dst, int flags = 0, int nonze /** @brief Performs a forward or inverse discrete Cosine transform of 1D or 2D array. -The function dct performs a forward or inverse discrete Cosine transform (DCT) of a 1D or 2D +The function cv::dct performs a forward or inverse discrete Cosine transform (DCT) of a 1D or 2D floating-point array: - Forward Cosine transform of a 1D vector of N elements: \f[Y = C^{(N)} \cdot X\f] @@ -2084,9 +2192,9 @@ floating-point array: \f[X = \left (C^{(N)} \right )^T \cdot X \cdot C^{(N)}\f] The function chooses the mode of operation by looking at the flags and size of the input array: -- If (flags & DCT_INVERSE) == 0 , the function does a forward 1D or 2D transform. Otherwise, it +- If (flags & #DCT_INVERSE) == 0 , the function does a forward 1D or 2D transform. Otherwise, it is an inverse 1D or 2D transform. -- If (flags & DCT_ROWS) != 0 , the function performs a 1D transform of each row. +- If (flags & #DCT_ROWS) != 0 , the function performs a 1D transform of each row. - If the array is a single column or a single row, the function performs a 1D transform. - If none of the above is true, the function performs a 2D transform. @@ -2118,7 +2226,7 @@ CV_EXPORTS_W void idct(InputArray src, OutputArray dst, int flags = 0); /** @brief Performs the per-element multiplication of two Fourier spectrums. -The function mulSpectrums performs the per-element multiplication of the two CCS-packed or complex +The function cv::mulSpectrums performs the per-element multiplication of the two CCS-packed or complex matrices that are results of a real or complex Fourier transform. The function, together with dft and idft , may be used to calculate convolution (pass conjB=false ) @@ -2145,7 +2253,7 @@ original one. Arrays whose size is a power-of-two (2, 4, 8, 16, 32, ...) are the Though, the arrays whose size is a product of 2's, 3's, and 5's (for example, 300 = 5\*5\*3\*2\*2) are also processed quite efficiently. -The function getOptimalDFTSize returns the minimum number N that is greater than or equal to vecsize +The function cv::getOptimalDFTSize returns the minimum number N that is greater than or equal to vecsize so that the DFT of a vector of size N can be processed efficiently. In the current implementation N = 2 ^p^ \* 3 ^q^ \* 5 ^r^ for some integer p, q, r. @@ -2161,7 +2269,7 @@ CV_EXPORTS_W int getOptimalDFTSize(int vecsize); /** @brief Returns the default random number generator. -The function theRNG returns the default random number generator. For each thread, there is a +The function cv::theRNG returns the default random number generator. For each thread, there is a separate random number generator, so you can use the function safely in multi-thread environments. If you just need to get a single random number using this generator or initialize an array, you can use randu or randn instead. But if you are going to generate many random numbers inside a loop, it @@ -2170,6 +2278,14 @@ is much faster to use this function to retrieve the generator and then use RNG:: */ CV_EXPORTS RNG& theRNG(); +/** @brief Sets state of default random number generator. + +The function cv::setRNGSeed sets state of default random number generator to custom value. +@param seed new state for default random number generator +@sa RNG, randu, randn +*/ +CV_EXPORTS_W void setRNGSeed(int seed); + /** @brief Generates a single uniformly-distributed random number or an array of random numbers. Non-template variant of the function fills the matrix dst with uniformly-distributed @@ -2184,7 +2300,7 @@ CV_EXPORTS_W void randu(InputOutputArray dst, InputArray low, InputArray high); /** @brief Fills the array with normally distributed random numbers. -The function randn fills the matrix dst with normally distributed random numbers with the specified +The function cv::randn fills the matrix dst with normally distributed random numbers with the specified mean vector and the standard deviation matrix. The generated random numbers are clipped to fit the value range of the output array data type. @param dst output array of random numbers; the array must be pre-allocated and have 1 to 4 channels. @@ -2197,7 +2313,7 @@ CV_EXPORTS_W void randn(InputOutputArray dst, InputArray mean, InputArray stddev /** @brief Shuffles the array elements randomly. -The function randShuffle shuffles the specified 1D array by randomly choosing pairs of elements and +The function cv::randShuffle shuffles the specified 1D array by randomly choosing pairs of elements and swapping them. The number of such swap operations will be dst.rows\*dst.cols\*iterFactor . @param dst input/output numerical 1D array. @param iterFactor scale factor that determines the number of random swap operations (see the details @@ -2316,11 +2432,11 @@ class CV_EXPORTS PCA The operator performs %PCA of the supplied dataset. It is safe to reuse the same PCA structure for multiple datasets. That is, if the structure has been previously used with another dataset, the existing internal - data is reclaimed and the new eigenvalues, @ref eigenvectors , and @ref + data is reclaimed and the new @ref eigenvalues, @ref eigenvectors and @ref mean are allocated and computed. - The computed eigenvalues are sorted from the largest to the smallest and - the corresponding eigenvectors are stored as eigenvectors rows. + The computed @ref eigenvalues are sorted from the largest to the smallest and + the corresponding @ref eigenvectors are stored as eigenvectors rows. @param data input samples stored as the matrix rows or as the matrix columns. @@ -2400,25 +2516,35 @@ class CV_EXPORTS PCA */ void backProject(InputArray vec, OutputArray result) const; - /** @brief write and load PCA matrix + /** @brief write PCA objects -*/ - void write(FileStorage& fs ) const; - void read(const FileNode& fs); + Writes @ref eigenvalues @ref eigenvectors and @ref mean to specified FileStorage + */ + void write(FileStorage& fs) const; + + /** @brief load PCA objects + + Loads @ref eigenvalues @ref eigenvectors and @ref mean from specified FileNode + */ + void read(const FileNode& fn); Mat eigenvectors; //!< eigenvectors of the covariation matrix Mat eigenvalues; //!< eigenvalues of the covariation matrix Mat mean; //!< mean value subtracted before the projection and added after the back projection }; -/** @example pca.cpp - An example using %PCA for dimensionality reduction while maintaining an amount of variance - */ +/** @example samples/cpp/pca.cpp +An example using %PCA for dimensionality reduction while maintaining an amount of variance +*/ + +/** @example samples/cpp/tutorial_code/ml/introduction_to_pca/introduction_to_pca.cpp +Check @ref tutorial_introduction_to_pca "the corresponding tutorial" for more details +*/ /** - @brief Linear Discriminant Analysis - @todo document this class - */ +@brief Linear Discriminant Analysis +@todo document this class +*/ class CV_EXPORTS LDA { public: @@ -2480,7 +2606,6 @@ class CV_EXPORTS LDA static Mat subspaceReconstruct(InputArray W, InputArray mean, InputArray src); protected: - bool _dataAsRow; // unused, but needed for 3.0 ABI compatibility. int _num_components; Mat _eigenvectors; Mat _eigenvalues; @@ -2525,7 +2650,7 @@ class CV_EXPORTS SVD /** @overload initializes an empty SVD structure and then calls SVD::operator() - @param src decomposed matrix. + @param src decomposed matrix. The depth has to be CV_32F or CV_64F. @param flags operation flags (SVD::Flags) */ SVD( InputArray src, int flags = 0 ); @@ -2538,7 +2663,7 @@ class CV_EXPORTS SVD different matrices. Each time, if needed, the previous u,`vt` , and w are reclaimed and the new matrices are created, which is all handled by Mat::create. - @param src decomposed matrix. + @param src decomposed matrix. The depth has to be CV_32F or CV_64F. @param flags operation flags (SVD::Flags) */ SVD& operator ()( InputArray src, int flags = 0 ); @@ -2554,18 +2679,18 @@ class CV_EXPORTS SVD SVD::compute(A, w, u, vt); @endcode - @param src decomposed matrix + @param src decomposed matrix. The depth has to be CV_32F or CV_64F. @param w calculated singular values @param u calculated left singular vectors - @param vt transposed matrix of right singular values - @param flags operation flags - see SVD::SVD. + @param vt transposed matrix of right singular vectors + @param flags operation flags - see SVD::Flags. */ static void compute( InputArray src, OutputArray w, OutputArray u, OutputArray vt, int flags = 0 ); /** @overload computes singular values of a matrix - @param src decomposed matrix + @param src decomposed matrix. The depth has to be CV_32F or CV_64F. @param w calculated singular values @param flags operation flags - see SVD::Flags. */ @@ -2609,7 +2734,7 @@ class CV_EXPORTS SVD if you need to solve many linear systems with the same left-hand side (for example, src ). If all you need is to solve a single system (possibly with multiple rhs immediately available), simply call solve - add pass DECOMP_SVD there. It does absolutely the same thing. + add pass #DECOMP_SVD there. It does absolutely the same thing. */ void backSubst( InputArray rhs, OutputArray dst ) const; @@ -2716,7 +2841,7 @@ class CV_EXPORTS RNG double a1 = rng.uniform((double)0, (double)1); // produces float from [0, 1) - double b = rng.uniform(0.f, 1.f); + float b = rng.uniform(0.f, 1.f); // produces double from [0, 1) double c = rng.uniform(0., 1.); @@ -2732,9 +2857,9 @@ class CV_EXPORTS RNG want a floating-point random number, but the range boundaries are integer numbers, either put dots in the end, if they are constants, or use explicit type cast operators, as in the a1 initialization above. - @param a lower inclusive boundary of the returned random numbers. - @param b upper non-inclusive boundary of the returned random numbers. - */ + @param a lower inclusive boundary of the returned random number. + @param b upper non-inclusive boundary of the returned random number. + */ int uniform(int a, int b); /** @overload */ float uniform(float a, float b); @@ -2788,13 +2913,15 @@ class CV_EXPORTS RNG double gaussian(double sigma); uint64 state; + + bool operator ==(const RNG& other) const; }; /** @brief Mersenne Twister random number generator Inspired by http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/MT2002/CODES/mt19937ar.c @todo document - */ +*/ class CV_EXPORTS RNG_MT19937 { public: @@ -2812,17 +2939,11 @@ class CV_EXPORTS RNG_MT19937 unsigned operator ()(unsigned N); unsigned operator ()(); - /** @brief returns uniformly distributed integer random number from [a,b) range - -*/ + /** @brief returns uniformly distributed integer random number from [a,b) range*/ int uniform(int a, int b); - /** @brief returns uniformly distributed floating-point random number from [a,b) range - -*/ + /** @brief returns uniformly distributed floating-point random number from [a,b) range*/ float uniform(float a, float b); - /** @brief returns uniformly distributed double-precision floating-point random number from [a,b) range - -*/ + /** @brief returns uniformly distributed double-precision floating-point random number from [a,b) range*/ double uniform(double a, double b); private: @@ -2836,14 +2957,14 @@ class CV_EXPORTS RNG_MT19937 //! @addtogroup core_cluster //! @{ -/** @example kmeans.cpp - An example on K-means clustering +/** @example samples/cpp/kmeans.cpp +An example on K-means clustering */ /** @brief Finds centers of clusters and groups input samples around the clusters. The function kmeans implements a k-means algorithm that finds the centers of cluster_count clusters -and groups the input samples around the clusters. As an output, \f$\texttt{labels}_i\f$ contains a +and groups the input samples around the clusters. As an output, \f$\texttt{bestLabels}_i\f$ contains a 0-based cluster index for the sample stored in the \f$i^{th}\f$ row of the samples matrix. @note @@ -2870,7 +2991,7 @@ function parameter). after every attempt. The best (minimum) value is chosen and the corresponding labels and the compactness value are returned by the function. Basically, you can use only the core of the function, set the number of attempts to 1, initialize labels each time using a custom algorithm, -pass them with the ( flags = KMEANS_USE_INITIAL_LABELS ) flag, and then choose the best +pass them with the ( flags = #KMEANS_USE_INITIAL_LABELS ) flag, and then choose the best (most-compact) clustering. */ CV_EXPORTS_W double kmeans( InputArray data, int K, InputOutputArray bestLabels, @@ -2897,7 +3018,8 @@ class CV_EXPORTS Formatted class CV_EXPORTS Formatter { public: - enum { FMT_DEFAULT = 0, + enum FormatType { + FMT_DEFAULT = 0, FMT_MATLAB = 1, FMT_CSV = 2, FMT_PYTHON = 3, @@ -2909,11 +3031,12 @@ class CV_EXPORTS Formatter virtual Ptr format(const Mat& mtx) const = 0; + virtual void set16fPrecision(int p = 4) = 0; virtual void set32fPrecision(int p = 8) = 0; virtual void set64fPrecision(int p = 16) = 0; virtual void setMultiline(bool ml = true) = 0; - static Ptr get(int fmt = FMT_DEFAULT); + static Ptr get(Formatter::FormatType fmt = FMT_DEFAULT); }; @@ -2936,7 +3059,7 @@ String& operator << (String& out, const Mat& mtx) class CV_EXPORTS Algorithm; -template struct ParamType {}; +template struct ParamType {}; /** @brief This is a base class for all more or less complex algorithms in OpenCV @@ -2947,32 +3070,9 @@ matching, graph-cut etc.), background subtraction (which can be done using mixtu models, codebook-based algorithm etc.), optical flow (block matching, Lucas-Kanade, Horn-Schunck etc.). -Here is example of SIFT use in your application via Algorithm interface: -@code - #include "opencv2/opencv.hpp" - #include "opencv2/xfeatures2d.hpp" - using namespace cv::xfeatures2d; - - Ptr sift = SIFT::create(); - FileStorage fs("sift_params.xml", FileStorage::READ); - if( fs.isOpened() ) // if we have file with parameters, read them - { - sift->read(fs["sift_params"]); - fs.release(); - } - else // else modify the parameters and store them; user can later edit the file to use different parameters - { - sift->setContrastThreshold(0.01f); // lower the contrast threshold, compared to the default value - { - WriteStructContext ws(fs, "sift_params", CV_NODE_MAP); - sift->write(fs); - } - } - Mat image = imread("myimage.png", 0), descriptors; - vector keypoints; - sift->detectAndCompute(image, noArray(), keypoints, descriptors); -@endcode - */ +Here is example of SimpleBlobDetector use in your application via Algorithm interface: +@snippet snippets/core_various.cpp Algorithm +*/ class CV_EXPORTS_W Algorithm { public: @@ -2985,26 +3085,32 @@ class CV_EXPORTS_W Algorithm /** @brief Stores algorithm parameters in a file storage */ - virtual void write(FileStorage& fs) const { (void)fs; } + virtual void write(FileStorage& fs) const { CV_UNUSED(fs); } + + /** @brief simplified API for language bindings + * @overload + */ + CV_WRAP void write(const Ptr& fs, const String& name = String()) const; /** @brief Reads algorithm parameters from a file storage */ - virtual void read(const FileNode& fn) { (void)fn; } + CV_WRAP virtual void read(const FileNode& fn) { CV_UNUSED(fn); } /** @brief Returns true if the Algorithm is empty (e.g. in the very beginning or after unsuccessful read - */ - virtual bool empty() const { return false; } + */ + CV_WRAP virtual bool empty() const { return false; } /** @brief Reads algorithm from the file node - This is static template method of Algorithm. It's usage is following (in the case of SVM): - @code - Ptr svm = Algorithm::read(fn); - @endcode - In order to make this method work, the derived class must overwrite Algorithm::read(const - FileNode& fn) and also have static create() method without parameters - (or with all the optional parameters) - */ + This is static template method of Algorithm. It's usage is following (in the case of SVM): + @code + cv::FileStorage fsRead("example.xml", FileStorage::READ); + Ptr svm = Algorithm::read(fsRead.root()); + @endcode + In order to make this method work, the derived class must overwrite Algorithm::read(const + FileNode& fn) and also have static create() method without parameters + (or with all the optional parameters) + */ template static Ptr<_Tp> read(const FileNode& fn) { Ptr<_Tp> obj = _Tp::create(); @@ -3014,20 +3120,22 @@ class CV_EXPORTS_W Algorithm /** @brief Loads algorithm from the file - @param filename Name of the file to read. - @param objname The optional name of the node to read (if empty, the first top-level node will be used) + @param filename Name of the file to read. + @param objname The optional name of the node to read (if empty, the first top-level node will be used) - This is static template method of Algorithm. It's usage is following (in the case of SVM): - @code - Ptr svm = Algorithm::load("my_svm_model.xml"); - @endcode - In order to make this method work, the derived class must overwrite Algorithm::read(const - FileNode& fn). - */ + This is static template method of Algorithm. It's usage is following (in the case of SVM): + @code + Ptr svm = Algorithm::load("my_svm_model.xml"); + @endcode + In order to make this method work, the derived class must overwrite Algorithm::read(const + FileNode& fn). + */ template static Ptr<_Tp> load(const String& filename, const String& objname=String()) { FileStorage fs(filename, FileStorage::READ); + CV_Assert(fs.isOpened()); FileNode fn = objname.empty() ? fs.getFirstTopLevelNode() : fs[objname]; + if (fn.empty()) return Ptr<_Tp>(); Ptr<_Tp> obj = _Tp::create(); obj->read(fn); return !obj->empty() ? obj : Ptr<_Tp>(); @@ -3035,14 +3143,14 @@ class CV_EXPORTS_W Algorithm /** @brief Loads algorithm from a String - @param strModel The string variable containing the model you want to load. - @param objname The optional name of the node to read (if empty, the first top-level node will be used) + @param strModel The string variable containing the model you want to load. + @param objname The optional name of the node to read (if empty, the first top-level node will be used) - This is static template method of Algorithm. It's usage is following (in the case of SVM): - @code - Ptr svm = Algorithm::loadFromString(myStringModel); - @endcode - */ + This is static template method of Algorithm. It's usage is following (in the case of SVM): + @code + Ptr svm = Algorithm::loadFromString(myStringModel); + @endcode + */ template static Ptr<_Tp> loadFromString(const String& strModel, const String& objname=String()) { FileStorage fs(strModel, FileStorage::READ + FileStorage::MEMORY); @@ -3053,17 +3161,20 @@ class CV_EXPORTS_W Algorithm } /** Saves the algorithm to a file. - In order to make this method work, the derived class must implement Algorithm::write(FileStorage& fs). */ + In order to make this method work, the derived class must implement Algorithm::write(FileStorage& fs). */ CV_WRAP virtual void save(const String& filename) const; /** Returns the algorithm string identifier. - This string is used as top level xml/yml node tag when the object is saved to a file or string. */ + This string is used as top level xml/yml node tag when the object is saved to a file or string. */ CV_WRAP virtual String getDefaultName() const; + +protected: + void writeFormat(FileStorage& fs) const; }; -struct Param { - enum { INT=0, BOOLEAN=1, REAL=2, STRING=3, MAT=4, MAT_VECTOR=5, ALGORITHM=6, FLOAT=7, - UNSIGNED_INT=8, UINT64=9, UCHAR=11 }; +enum struct Param { + INT=0, BOOLEAN=1, REAL=2, STRING=3, MAT=4, MAT_VECTOR=5, ALGORITHM=6, FLOAT=7, + UNSIGNED_INT=8, UINT64=9, UCHAR=11, SCALAR=12 }; @@ -3073,7 +3184,7 @@ template<> struct ParamType typedef bool const_param_type; typedef bool member_type; - enum { type = Param::BOOLEAN }; + static const Param type = Param::BOOLEAN; }; template<> struct ParamType @@ -3081,7 +3192,7 @@ template<> struct ParamType typedef int const_param_type; typedef int member_type; - enum { type = Param::INT }; + static const Param type = Param::INT; }; template<> struct ParamType @@ -3089,7 +3200,7 @@ template<> struct ParamType typedef double const_param_type; typedef double member_type; - enum { type = Param::REAL }; + static const Param type = Param::REAL; }; template<> struct ParamType @@ -3097,7 +3208,7 @@ template<> struct ParamType typedef const String& const_param_type; typedef String member_type; - enum { type = Param::STRING }; + static const Param type = Param::STRING; }; template<> struct ParamType @@ -3105,7 +3216,7 @@ template<> struct ParamType typedef const Mat& const_param_type; typedef Mat member_type; - enum { type = Param::MAT }; + static const Param type = Param::MAT; }; template<> struct ParamType > @@ -3113,7 +3224,7 @@ template<> struct ParamType > typedef const std::vector& const_param_type; typedef std::vector member_type; - enum { type = Param::MAT_VECTOR }; + static const Param type = Param::MAT_VECTOR; }; template<> struct ParamType @@ -3121,7 +3232,7 @@ template<> struct ParamType typedef const Ptr& const_param_type; typedef Ptr member_type; - enum { type = Param::ALGORITHM }; + static const Param type = Param::ALGORITHM; }; template<> struct ParamType @@ -3129,7 +3240,7 @@ template<> struct ParamType typedef float const_param_type; typedef float member_type; - enum { type = Param::FLOAT }; + static const Param type = Param::FLOAT; }; template<> struct ParamType @@ -3137,7 +3248,7 @@ template<> struct ParamType typedef unsigned const_param_type; typedef unsigned member_type; - enum { type = Param::UNSIGNED_INT }; + static const Param type = Param::UNSIGNED_INT; }; template<> struct ParamType @@ -3145,7 +3256,7 @@ template<> struct ParamType typedef uint64 const_param_type; typedef uint64 member_type; - enum { type = Param::UINT64 }; + static const Param type = Param::UINT64; }; template<> struct ParamType @@ -3153,7 +3264,24 @@ template<> struct ParamType typedef uchar const_param_type; typedef uchar member_type; - enum { type = Param::UCHAR }; + static const Param type = Param::UCHAR; +}; + +template<> struct ParamType +{ + typedef const Scalar& const_param_type; + typedef Scalar member_type; + + static const Param type = Param::SCALAR; +}; + +template +struct ParamType<_Tp, typename std::enable_if< std::is_enum<_Tp>::value >::type> +{ + typedef typename std::underlying_type<_Tp>::type const_param_type; + typedef typename std::underlying_type<_Tp>::type member_type; + + static const Param type = Param::INT; }; //! @} core_basic @@ -3164,5 +3292,6 @@ template<> struct ParamType #include "opencv2/core/cvstd.inl.hpp" #include "opencv2/core/utility.hpp" #include "opencv2/core/optim.hpp" +#include "opencv2/core/ovx.hpp" -#endif /*__OPENCV_CORE_HPP__*/ +#endif /*OPENCV_CORE_HPP*/ diff --git a/IPL/include/opencv/opencv2/core/affine.hpp b/IPL/include/opencv/opencv2/core/affine.hpp index 7f8deb5..7e2ed30 100644 --- a/IPL/include/opencv/opencv2/core/affine.hpp +++ b/IPL/include/opencv/opencv2/core/affine.hpp @@ -41,8 +41,8 @@ // //M*/ -#ifndef __OPENCV_CORE_AFFINE3_HPP__ -#define __OPENCV_CORE_AFFINE3_HPP__ +#ifndef OPENCV_CORE_AFFINE3_HPP +#define OPENCV_CORE_AFFINE3_HPP #ifdef __cplusplus @@ -55,7 +55,72 @@ namespace cv //! @{ /** @brief Affine transform - @todo document + * + * It represents a 4x4 homogeneous transformation matrix \f$T\f$ + * + * \f[T = + * \begin{bmatrix} + * R & t\\ + * 0 & 1\\ + * \end{bmatrix} + * \f] + * + * where \f$R\f$ is a 3x3 rotation matrix and \f$t\f$ is a 3x1 translation vector. + * + * You can specify \f$R\f$ either by a 3x3 rotation matrix or by a 3x1 rotation vector, + * which is converted to a 3x3 rotation matrix by the Rodrigues formula. + * + * To construct a matrix \f$T\f$ representing first rotation around the axis \f$r\f$ with rotation + * angle \f$|r|\f$ in radian (right hand rule) and then translation by the vector \f$t\f$, you can use + * + * @code + * cv::Vec3f r, t; + * cv::Affine3f T(r, t); + * @endcode + * + * If you already have the rotation matrix \f$R\f$, then you can use + * + * @code + * cv::Matx33f R; + * cv::Affine3f T(R, t); + * @endcode + * + * To extract the rotation matrix \f$R\f$ from \f$T\f$, use + * + * @code + * cv::Matx33f R = T.rotation(); + * @endcode + * + * To extract the translation vector \f$t\f$ from \f$T\f$, use + * + * @code + * cv::Vec3f t = T.translation(); + * @endcode + * + * To extract the rotation vector \f$r\f$ from \f$T\f$, use + * + * @code + * cv::Vec3f r = T.rvec(); + * @endcode + * + * Note that since the mapping from rotation vectors to rotation matrices + * is many to one. The returned rotation vector is not necessarily the one + * you used before to set the matrix. + * + * If you have two transformations \f$T = T_1 * T_2\f$, use + * + * @code + * cv::Affine3f T, T1, T2; + * T = T2.concatenate(T1); + * @endcode + * + * To get the inverse transform of \f$T\f$, use + * + * @code + * cv::Affine3f T, T_inv; + * T_inv = T.inv(); + * @endcode + * */ template class Affine3 @@ -66,45 +131,127 @@ namespace cv typedef Matx Mat4; typedef Vec Vec3; + //! Default constructor. It represents a 4x4 identity matrix. Affine3(); //! Augmented affine matrix Affine3(const Mat4& affine); - //! Rotation matrix + /** + * The resulting 4x4 matrix is + * + * \f[ + * \begin{bmatrix} + * R & t\\ + * 0 & 1\\ + * \end{bmatrix} + * \f] + * + * @param R 3x3 rotation matrix. + * @param t 3x1 translation vector. + */ Affine3(const Mat3& R, const Vec3& t = Vec3::all(0)); - //! Rodrigues vector + /** + * Rodrigues vector. + * + * The last row of the current matrix is set to [0,0,0,1]. + * + * @param rvec 3x1 rotation vector. Its direction indicates the rotation axis and its length + * indicates the rotation angle in radian (using right hand rule). + * @param t 3x1 translation vector. + */ Affine3(const Vec3& rvec, const Vec3& t = Vec3::all(0)); - //! Combines all contructors above. Supports 4x4, 4x3, 3x3, 1x3, 3x1 sizes of data matrix + /** + * Combines all constructors above. Supports 4x4, 3x4, 3x3, 1x3, 3x1 sizes of data matrix. + * + * The last row of the current matrix is set to [0,0,0,1] when data is not 4x4. + * + * @param data 1-channel matrix. + * when it is 4x4, it is copied to the current matrix and t is not used. + * When it is 3x4, it is copied to the upper part 3x4 of the current matrix and t is not used. + * When it is 3x3, it is copied to the upper left 3x3 part of the current matrix. + * When it is 3x1 or 1x3, it is treated as a rotation vector and the Rodrigues formula is used + * to compute a 3x3 rotation matrix. + * @param t 3x1 translation vector. It is used only when data is neither 4x4 nor 3x4. + */ explicit Affine3(const Mat& data, const Vec3& t = Vec3::all(0)); - //! From 16th element array + //! From 16-element array explicit Affine3(const float_type* vals); - //! Create identity transform + //! Create an 4x4 identity transform static Affine3 Identity(); - //! Rotation matrix + /** + * Rotation matrix. + * + * Copy the rotation matrix to the upper left 3x3 part of the current matrix. + * The remaining elements of the current matrix are not changed. + * + * @param R 3x3 rotation matrix. + * + */ void rotation(const Mat3& R); - //! Rodrigues vector + /** + * Rodrigues vector. + * + * It sets the upper left 3x3 part of the matrix. The remaining part is unaffected. + * + * @param rvec 3x1 rotation vector. The direction indicates the rotation axis and + * its length indicates the rotation angle in radian (using the right thumb convention). + */ void rotation(const Vec3& rvec); - //! Combines rotation methods above. Suports 3x3, 1x3, 3x1 sizes of data matrix; + /** + * Combines rotation methods above. Supports 3x3, 1x3, 3x1 sizes of data matrix. + * + * It sets the upper left 3x3 part of the matrix. The remaining part is unaffected. + * + * @param data 1-channel matrix. + * When it is a 3x3 matrix, it sets the upper left 3x3 part of the current matrix. + * When it is a 1x3 or 3x1 matrix, it is used as a rotation vector. The Rodrigues formula + * is used to compute the rotation matrix and sets the upper left 3x3 part of the current matrix. + */ void rotation(const Mat& data); + /** + * Copy the 3x3 matrix L to the upper left part of the current matrix + * + * It sets the upper left 3x3 part of the matrix. The remaining part is unaffected. + * + * @param L 3x3 matrix. + */ void linear(const Mat3& L); + + /** + * Copy t to the first three elements of the last column of the current matrix + * + * It sets the upper right 3x1 part of the matrix. The remaining part is unaffected. + * + * @param t 3x1 translation vector. + */ void translation(const Vec3& t); + //! @return the upper left 3x3 part Mat3 rotation() const; + + //! @return the upper left 3x3 part Mat3 linear() const; + + //! @return the upper right 3x1 part Vec3 translation() const; - //! Rodrigues vector + //! Rodrigues vector. + //! @return a vector representing the upper left 3x3 rotation matrix of the current matrix. + //! @warning Since the mapping between rotation vectors and rotation matrices is many to one, + //! this function returns only one rotation vector that represents the current rotation matrix, + //! which is not necessarily the same one set by `rotation(const Vec3& rvec)`. Vec3 rvec() const; + //! @return the inverse of the current matrix. Affine3 inv(int method = cv::DECOMP_SVD) const; //! a.rotate(R) is equivalent to Affine(R, 0) * a; @@ -113,7 +260,7 @@ namespace cv //! a.rotate(rvec) is equivalent to Affine(rvec, 0) * a; Affine3 rotate(const Vec3& rvec) const; - //! a.translate(t) is equivalent to Affine(E, t) * a; + //! a.translate(t) is equivalent to Affine(E, t) * a, where E is an identity matrix Affine3 translate(const Vec3& t) const; //! a.concatenate(affine) is equivalent to affine * a; @@ -136,6 +283,7 @@ namespace cv template static Affine3 operator*(const Affine3& affine1, const Affine3& affine2); + //! V is a 3-element vector with member fields x, y and z template static V operator*(const Affine3& affine, const V& vector); @@ -153,15 +301,24 @@ namespace cv typedef _Tp channel_type; enum { generic_type = 0, - depth = DataType::depth, channels = 16, - fmt = DataType::fmt + ((channels - 1) << 8), - type = CV_MAKETYPE(depth, channels) + fmt = traits::SafeFmt::fmt + ((channels - 1) << 8) +#ifdef OPENCV_TRAITS_ENABLE_DEPRECATED + ,depth = DataType::depth + ,type = CV_MAKETYPE(depth, channels) +#endif }; typedef Vec vec_type; }; + namespace traits { + template + struct Depth< Affine3<_Tp> > { enum { value = Depth<_Tp>::value }; }; + template + struct Type< Affine3<_Tp> > { enum { value = CV_MAKETYPE(Depth<_Tp>::value, 16) }; }; + } // namespace + //! @} core } @@ -169,7 +326,7 @@ namespace cv //! @cond IGNORED /////////////////////////////////////////////////////////////////////////////////// -// Implementaiton +// Implementation template inline cv::Affine3::Affine3() @@ -202,7 +359,8 @@ cv::Affine3::Affine3(const Vec3& _rvec, const Vec3& t) template inline cv::Affine3::Affine3(const cv::Mat& data, const Vec3& t) { - CV_Assert(data.type() == cv::DataType::type); + CV_Assert(data.type() == cv::traits::Type::value); + CV_Assert(data.channels() == 1); if (data.cols == 4 && data.rows == 4) { @@ -213,11 +371,13 @@ cv::Affine3::Affine3(const cv::Mat& data, const Vec3& t) { rotation(data(Rect(0, 0, 3, 3))); translation(data(Rect(3, 0, 1, 3))); - return; + } + else + { + rotation(data); + translation(t); } - rotation(data); - translation(t); matrix.val[12] = matrix.val[13] = matrix.val[14] = 0; matrix.val[15] = 1; } @@ -265,11 +425,12 @@ void cv::Affine3::rotation(const Vec3& _rvec) } } -//Combines rotation methods above. Suports 3x3, 1x3, 3x1 sizes of data matrix; +//Combines rotation methods above. Supports 3x3, 1x3, 3x1 sizes of data matrix; template inline void cv::Affine3::rotation(const cv::Mat& data) { - CV_Assert(data.type() == cv::DataType::type); + CV_Assert(data.type() == cv::traits::Type::value); + CV_Assert(data.channels() == 1); if (data.cols == 3 && data.rows == 3) { @@ -284,7 +445,7 @@ void cv::Affine3::rotation(const cv::Mat& data) rotation(_rvec); } else - CV_Assert(!"Input marix can be 3x3, 1x3 or 3x1"); + CV_Error(Error::StsError, "Input matrix can only be 3x3, 1x3 or 3x1"); } template inline @@ -483,21 +644,21 @@ cv::Vec3d cv::operator*(const cv::Affine3d& affine, const cv::Vec3d& v) template inline cv::Affine3::Affine3(const Eigen::Transform& affine) { - cv::Mat(4, 4, cv::DataType::type, affine.matrix().data()).copyTo(matrix); + cv::Mat(4, 4, cv::traits::Type::value, affine.matrix().data()).copyTo(matrix); } template inline cv::Affine3::Affine3(const Eigen::Transform& affine) { Eigen::Transform a = affine; - cv::Mat(4, 4, cv::DataType::type, a.matrix().data()).copyTo(matrix); + cv::Mat(4, 4, cv::traits::Type::value, a.matrix().data()).copyTo(matrix); } template inline cv::Affine3::operator Eigen::Transform() const { Eigen::Transform r; - cv::Mat hdr(4, 4, cv::DataType::type, r.matrix().data()); + cv::Mat hdr(4, 4, cv::traits::Type::value, r.matrix().data()); cv::Mat(matrix, false).copyTo(hdr); return r; } @@ -514,4 +675,4 @@ cv::Affine3::operator Eigen::Transform() const #endif /* __cplusplus */ -#endif /* __OPENCV_CORE_AFFINE3_HPP__ */ +#endif /* OPENCV_CORE_AFFINE3_HPP */ diff --git a/IPL/include/opencv/opencv2/core/async.hpp b/IPL/include/opencv/opencv2/core/async.hpp new file mode 100644 index 0000000..54560c7 --- /dev/null +++ b/IPL/include/opencv/opencv2/core/async.hpp @@ -0,0 +1,105 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#ifndef OPENCV_CORE_ASYNC_HPP +#define OPENCV_CORE_ASYNC_HPP + +#include + +#ifdef CV_CXX11 +//#include +#include +#endif + +namespace cv { + +/** @addtogroup core_async + +@{ +*/ + + +/** @brief Returns result of asynchronous operations + +Object has attached asynchronous state. +Assignment operator doesn't clone asynchronous state (it is shared between all instances). + +Result can be fetched via get() method only once. + +*/ +class CV_EXPORTS_W AsyncArray +{ +public: + ~AsyncArray() CV_NOEXCEPT; + CV_WRAP AsyncArray() CV_NOEXCEPT; + AsyncArray(const AsyncArray& o) CV_NOEXCEPT; + AsyncArray& operator=(const AsyncArray& o) CV_NOEXCEPT; + CV_WRAP void release() CV_NOEXCEPT; + + /** Fetch the result. + @param[out] dst destination array + + Waits for result until container has valid result. + Throws exception if exception was stored as a result. + + Throws exception on invalid container state. + + @note Result or stored exception can be fetched only once. + */ + CV_WRAP void get(OutputArray dst) const; + + /** Retrieving the result with timeout + @param[out] dst destination array + @param[in] timeoutNs timeout in nanoseconds, -1 for infinite wait + + @returns true if result is ready, false if the timeout has expired + + @note Result or stored exception can be fetched only once. + */ + bool get(OutputArray dst, int64 timeoutNs) const; + + CV_WRAP inline + bool get(OutputArray dst, double timeoutNs) const { return get(dst, (int64)timeoutNs); } + + bool wait_for(int64 timeoutNs) const; + + CV_WRAP inline + bool wait_for(double timeoutNs) const { return wait_for((int64)timeoutNs); } + + CV_WRAP bool valid() const CV_NOEXCEPT; + +#ifdef CV_CXX11 + inline AsyncArray(AsyncArray&& o) { p = o.p; o.p = NULL; } + inline AsyncArray& operator=(AsyncArray&& o) CV_NOEXCEPT { std::swap(p, o.p); return *this; } + + template + inline bool get(OutputArray dst, const std::chrono::duration<_Rep, _Period>& timeout) + { + return get(dst, (int64)(std::chrono::nanoseconds(timeout).count())); + } + + template + inline bool wait_for(const std::chrono::duration<_Rep, _Period>& timeout) + { + return wait_for((int64)(std::chrono::nanoseconds(timeout).count())); + } + +#if 0 + std::future getFutureMat() const; + std::future getFutureUMat() const; +#endif +#endif + + + // PImpl + struct Impl; friend struct Impl; + inline void* _getImpl() const CV_NOEXCEPT { return p; } +protected: + Impl* p; +}; + + +//! @} +} // namespace +#endif // OPENCV_CORE_ASYNC_HPP diff --git a/IPL/include/opencv/opencv2/core/base.hpp b/IPL/include/opencv/opencv2/core/base.hpp index ed633f5..a3a3e51 100644 --- a/IPL/include/opencv/opencv2/core/base.hpp +++ b/IPL/include/opencv/opencv2/core/base.hpp @@ -42,13 +42,15 @@ // //M*/ -#ifndef __OPENCV_CORE_BASE_HPP__ -#define __OPENCV_CORE_BASE_HPP__ +#ifndef OPENCV_CORE_BASE_HPP +#define OPENCV_CORE_BASE_HPP #ifndef __cplusplus # error base.hpp header must be compiled as C++ #endif +#include "opencv2/opencv_modules.hpp" + #include #include @@ -64,38 +66,38 @@ namespace cv namespace Error { //! error codes enum Code { - StsOk= 0, //!< everithing is ok + StsOk= 0, //!< everything is ok StsBackTrace= -1, //!< pseudo error for back trace StsError= -2, //!< unknown /unspecified error StsInternal= -3, //!< internal error (bad state) StsNoMem= -4, //!< insufficient memory StsBadArg= -5, //!< function arg/param is bad StsBadFunc= -6, //!< unsupported function - StsNoConv= -7, //!< iter. didn't converge + StsNoConv= -7, //!< iteration didn't converge StsAutoTrace= -8, //!< tracing HeaderIsNull= -9, //!< image header is NULL BadImageSize= -10, //!< image size is invalid BadOffset= -11, //!< offset is invalid BadDataPtr= -12, //!< - BadStep= -13, //!< + BadStep= -13, //!< image step is wrong, this may happen for a non-continuous matrix. BadModelOrChSeq= -14, //!< - BadNumChannels= -15, //!< + BadNumChannels= -15, //!< bad number of channels, for example, some functions accept only single channel matrices. BadNumChannel1U= -16, //!< - BadDepth= -17, //!< + BadDepth= -17, //!< input image depth is not supported by the function BadAlphaChannel= -18, //!< - BadOrder= -19, //!< - BadOrigin= -20, //!< - BadAlign= -21, //!< + BadOrder= -19, //!< number of dimensions is out of range + BadOrigin= -20, //!< incorrect input origin + BadAlign= -21, //!< incorrect input align BadCallBack= -22, //!< BadTileSize= -23, //!< - BadCOI= -24, //!< - BadROISize= -25, //!< + BadCOI= -24, //!< input COI is not supported + BadROISize= -25, //!< incorrect input roi MaskIsTiled= -26, //!< StsNullPtr= -27, //!< null pointer StsVecLengthErr= -28, //!< incorrect vector length - StsFilterStructContentErr= -29, //!< incorr. filter structure content - StsKernelStructContentErr= -30, //!< incorr. transform kernel content - StsFilterOffsetErr= -31, //!< incorrect filter ofset value + StsFilterStructContentErr= -29, //!< incorrect filter structure content + StsKernelStructContentErr= -30, //!< incorrect transform kernel content + StsFilterOffsetErr= -31, //!< incorrect filter offset value StsBadSize= -201, //!< the input/output structure size is incorrect StsDivByZero= -202, //!< division by zero StsInplaceNotSupported= -203, //!< in-place operation is not supported @@ -111,13 +113,13 @@ enum Code { StsNotImplemented= -213, //!< the requested function/feature is not implemented StsBadMemBlock= -214, //!< an allocated block has been corrupted StsAssert= -215, //!< assertion failed - GpuNotSupported= -216, - GpuApiCallError= -217, - OpenGlNotSupported= -218, - OpenGlApiCallError= -219, - OpenCLApiCallError= -220, + GpuNotSupported= -216, //!< no CUDA support + GpuApiCallError= -217, //!< GPU API call error + OpenGlNotSupported= -218, //!< no OpenGL support + OpenGlApiCallError= -219, //!< OpenGL API call error + OpenCLApiCallError= -220, //!< OpenCL API call error OpenCLDoubleNotSupported= -221, - OpenCLInitError= -222, + OpenCLInitError= -222, //!< OpenCL initialization error OpenCLNoAMDBlasFft= -223 }; } //Error @@ -150,46 +152,57 @@ enum DecompTypes { }; /** norm types -- For one array: -\f[norm = \forkthree{\|\texttt{src1}\|_{L_{\infty}} = \max _I | \texttt{src1} (I)|}{if \(\texttt{normType} = \texttt{NORM_INF}\) } -{ \| \texttt{src1} \| _{L_1} = \sum _I | \texttt{src1} (I)|}{if \(\texttt{normType} = \texttt{NORM_L1}\) } -{ \| \texttt{src1} \| _{L_2} = \sqrt{\sum_I \texttt{src1}(I)^2} }{if \(\texttt{normType} = \texttt{NORM_L2}\) }\f] - -- Absolute norm for two arrays -\f[norm = \forkthree{\|\texttt{src1}-\texttt{src2}\|_{L_{\infty}} = \max _I | \texttt{src1} (I) - \texttt{src2} (I)|}{if \(\texttt{normType} = \texttt{NORM_INF}\) } -{ \| \texttt{src1} - \texttt{src2} \| _{L_1} = \sum _I | \texttt{src1} (I) - \texttt{src2} (I)|}{if \(\texttt{normType} = \texttt{NORM_L1}\) } -{ \| \texttt{src1} - \texttt{src2} \| _{L_2} = \sqrt{\sum_I (\texttt{src1}(I) - \texttt{src2}(I))^2} }{if \(\texttt{normType} = \texttt{NORM_L2}\) }\f] - -- Relative norm for two arrays -\f[norm = \forkthree{\frac{\|\texttt{src1}-\texttt{src2}\|_{L_{\infty}} }{\|\texttt{src2}\|_{L_{\infty}} }}{if \(\texttt{normType} = \texttt{NORM_RELATIVE_INF}\) } -{ \frac{\|\texttt{src1}-\texttt{src2}\|_{L_1} }{\|\texttt{src2}\|_{L_1}} }{if \(\texttt{normType} = \texttt{NORM_RELATIVE_L1}\) } -{ \frac{\|\texttt{src1}-\texttt{src2}\|_{L_2} }{\|\texttt{src2}\|_{L_2}} }{if \(\texttt{normType} = \texttt{NORM_RELATIVE_L2}\) }\f] - -As example for one array consider the function \f$r(x)= \begin{pmatrix} x \\ 1-x \end{pmatrix}, x \in [-1;1]\f$. -The \f$ L_{1}, L_{2} \f$ and \f$ L_{\infty} \f$ norm for the sample value \f$r(-1) = \begin{pmatrix} -1 \\ 2 \end{pmatrix}\f$ -is calculated as follows -\f{align*} - \| r(-1) \|_{L_1} &= |-1| + |2| = 3 \\ - \| r(-1) \|_{L_2} &= \sqrt{(-1)^{2} + (2)^{2}} = \sqrt{5} \\ - \| r(-1) \|_{L_\infty} &= \max(|-1|,|2|) = 2 -\f} -and for \f$r(0.5) = \begin{pmatrix} 0.5 \\ 0.5 \end{pmatrix}\f$ the calculation is -\f{align*} - \| r(0.5) \|_{L_1} &= |0.5| + |0.5| = 1 \\ - \| r(0.5) \|_{L_2} &= \sqrt{(0.5)^{2} + (0.5)^{2}} = \sqrt{0.5} \\ - \| r(0.5) \|_{L_\infty} &= \max(|0.5|,|0.5|) = 0.5. -\f} -The following graphic shows all values for the three norm functions \f$\| r(x) \|_{L_1}, \| r(x) \|_{L_2}\f$ and \f$\| r(x) \|_{L_\infty}\f$. -It is notable that the \f$ L_{1} \f$ norm forms the upper and the \f$ L_{\infty} \f$ norm forms the lower border for the example function \f$ r(x) \f$. -![Graphs for the different norm functions from the above example](pics/NormTypes_OneArray_1-2-INF.png) - */ -enum NormTypes { NORM_INF = 1, + +src1 and src2 denote input arrays. +*/ + +enum NormTypes { + /** + \f[ + norm = \forkthree + {\|\texttt{src1}\|_{L_{\infty}} = \max _I | \texttt{src1} (I)|}{if \(\texttt{normType} = \texttt{NORM_INF}\) } + {\|\texttt{src1}-\texttt{src2}\|_{L_{\infty}} = \max _I | \texttt{src1} (I) - \texttt{src2} (I)|}{if \(\texttt{normType} = \texttt{NORM_INF}\) } + {\frac{\|\texttt{src1}-\texttt{src2}\|_{L_{\infty}} }{\|\texttt{src2}\|_{L_{\infty}} }}{if \(\texttt{normType} = \texttt{NORM_RELATIVE | NORM_INF}\) } + \f] + */ + NORM_INF = 1, + /** + \f[ + norm = \forkthree + {\| \texttt{src1} \| _{L_1} = \sum _I | \texttt{src1} (I)|}{if \(\texttt{normType} = \texttt{NORM_L1}\)} + { \| \texttt{src1} - \texttt{src2} \| _{L_1} = \sum _I | \texttt{src1} (I) - \texttt{src2} (I)|}{if \(\texttt{normType} = \texttt{NORM_L1}\) } + { \frac{\|\texttt{src1}-\texttt{src2}\|_{L_1} }{\|\texttt{src2}\|_{L_1}} }{if \(\texttt{normType} = \texttt{NORM_RELATIVE | NORM_L1}\) } + \f]*/ NORM_L1 = 2, + /** + \f[ + norm = \forkthree + { \| \texttt{src1} \| _{L_2} = \sqrt{\sum_I \texttt{src1}(I)^2} }{if \(\texttt{normType} = \texttt{NORM_L2}\) } + { \| \texttt{src1} - \texttt{src2} \| _{L_2} = \sqrt{\sum_I (\texttt{src1}(I) - \texttt{src2}(I))^2} }{if \(\texttt{normType} = \texttt{NORM_L2}\) } + { \frac{\|\texttt{src1}-\texttt{src2}\|_{L_2} }{\|\texttt{src2}\|_{L_2}} }{if \(\texttt{normType} = \texttt{NORM_RELATIVE | NORM_L2}\) } + \f] + */ NORM_L2 = 4, + /** + \f[ + norm = \forkthree + { \| \texttt{src1} \| _{L_2} ^{2} = \sum_I \texttt{src1}(I)^2} {if \(\texttt{normType} = \texttt{NORM_L2SQR}\)} + { \| \texttt{src1} - \texttt{src2} \| _{L_2} ^{2} = \sum_I (\texttt{src1}(I) - \texttt{src2}(I))^2 }{if \(\texttt{normType} = \texttt{NORM_L2SQR}\) } + { \left(\frac{\|\texttt{src1}-\texttt{src2}\|_{L_2} }{\|\texttt{src2}\|_{L_2}}\right)^2 }{if \(\texttt{normType} = \texttt{NORM_RELATIVE | NORM_L2SQR}\) } + \f] + */ NORM_L2SQR = 5, + /** + In the case of one input array, calculates the Hamming distance of the array from zero, + In the case of two input arrays, calculates the Hamming distance between the arrays. + */ NORM_HAMMING = 6, + /** + Similar to NORM_HAMMING, but in the calculation, each two bits of the input sequence will + be added and treated as a single bit to be used in the same calculation as NORM_HAMMING. + */ NORM_HAMMING2 = 7, - NORM_TYPE_MASK = 7, + NORM_TYPE_MASK = 7, //!< bit-mask which can be used to separate norm type from norm flags NORM_RELATIVE = 8, //!< flag NORM_MINMAX = 32 //!< flag }; @@ -237,6 +250,10 @@ enum DftFlags { into a real array and inverse transformation is executed, the function treats the input as a packed complex-conjugate symmetrical array, and the output will also be a real array). */ DFT_REAL_OUTPUT = 32, + /** specifies that input is complex input. If this flag is set, the input must have 2 channels. + On the other hand, for backwards compatibility reason, if input has 2 channels, input is + already considered complex. */ + DFT_COMPLEX_INPUT = 64, /** performs an inverse 1D or 2D transform instead of the default forward transform. */ DCT_INVERSE = DFT_INVERSE, /** performs a forward or inverse transform of every individual row of the input @@ -254,7 +271,7 @@ enum BorderTypes { BORDER_REFLECT = 2, //!< `fedcba|abcdefgh|hgfedcb` BORDER_WRAP = 3, //!< `cdefgh|abcdefgh|abcdefg` BORDER_REFLECT_101 = 4, //!< `gfedcb|abcdefgh|gfedcba` - BORDER_TRANSPARENT = 5, //!< `uvwxyz|absdefgh|ijklmno` + BORDER_TRANSPARENT = 5, //!< `uvwxyz|abcdefgh|ijklmno` BORDER_REFLECT101 = BORDER_REFLECT_101, //!< same as BORDER_REFLECT_101 BORDER_DEFAULT = BORDER_REFLECT_101, //!< same as BORDER_REFLECT_101 @@ -266,68 +283,6 @@ enum BorderTypes { //! @addtogroup core_utils //! @{ -//! @cond IGNORED - -//////////////// static assert ///////////////// -#define CVAUX_CONCAT_EXP(a, b) a##b -#define CVAUX_CONCAT(a, b) CVAUX_CONCAT_EXP(a,b) - -#if defined(__clang__) -# ifndef __has_extension -# define __has_extension __has_feature /* compatibility, for older versions of clang */ -# endif -# if __has_extension(cxx_static_assert) -# define CV_StaticAssert(condition, reason) static_assert((condition), reason " " #condition) -# elif __has_extension(c_static_assert) -# define CV_StaticAssert(condition, reason) _Static_assert((condition), reason " " #condition) -# endif -#elif defined(__GNUC__) -# if (defined(__GXX_EXPERIMENTAL_CXX0X__) || __cplusplus >= 201103L) -# define CV_StaticAssert(condition, reason) static_assert((condition), reason " " #condition) -# endif -#elif defined(_MSC_VER) -# if _MSC_VER >= 1600 /* MSVC 10 */ -# define CV_StaticAssert(condition, reason) static_assert((condition), reason " " #condition) -# endif -#endif -#ifndef CV_StaticAssert -# if !defined(__clang__) && defined(__GNUC__) && (__GNUC__*100 + __GNUC_MINOR__ > 302) -# define CV_StaticAssert(condition, reason) ({ extern int __attribute__((error("CV_StaticAssert: " reason " " #condition))) CV_StaticAssert(); ((condition) ? 0 : CV_StaticAssert()); }) -# else - template struct CV_StaticAssert_failed; - template <> struct CV_StaticAssert_failed { enum { val = 1 }; }; - template struct CV_StaticAssert_test {}; -# define CV_StaticAssert(condition, reason)\ - typedef cv::CV_StaticAssert_test< sizeof(cv::CV_StaticAssert_failed< static_cast(condition) >) > CVAUX_CONCAT(CV_StaticAssert_failed_at_, __LINE__) -# endif -#endif - -// Suppress warning "-Wdeprecated-declarations" / C4996 -#if defined(_MSC_VER) - #define CV_DO_PRAGMA(x) __pragma(x) -#elif defined(__GNUC__) - #define CV_DO_PRAGMA(x) _Pragma (#x) -#else - #define CV_DO_PRAGMA(x) -#endif - -#ifdef _MSC_VER -#define CV_SUPPRESS_DEPRECATED_START \ - CV_DO_PRAGMA(warning(push)) \ - CV_DO_PRAGMA(warning(disable: 4996)) -#define CV_SUPPRESS_DEPRECATED_END CV_DO_PRAGMA(warning(pop)) -#elif defined (__clang__) || ((__GNUC__) && (__GNUC__*100 + __GNUC_MINOR__ > 405)) -#define CV_SUPPRESS_DEPRECATED_START \ - CV_DO_PRAGMA(GCC diagnostic push) \ - CV_DO_PRAGMA(GCC diagnostic ignored "-Wdeprecated-declarations") -#define CV_SUPPRESS_DEPRECATED_END CV_DO_PRAGMA(GCC diagnostic pop) -#else -#define CV_SUPPRESS_DEPRECATED_START -#define CV_SUPPRESS_DEPRECATED_END -#endif -#define CV_UNUSED(name) (void)name -//! @endcond - /*! @brief Signals an error and raises the exception. By default the function prints information about the error to stderr, @@ -336,44 +291,21 @@ It is possible to alternate error processing by using redirectError(). @param _code - error code (Error::Code) @param _err - error description @param _func - function name. Available only when the compiler supports getting it -@param _file - source file name where the error has occured -@param _line - line number in the source file where the error has occured -@see CV_Error, CV_Error_, CV_ErrorNoReturn, CV_ErrorNoReturn_, CV_Assert, CV_DbgAssert +@param _file - source file name where the error has occurred +@param _line - line number in the source file where the error has occurred +@see CV_Error, CV_Error_, CV_Assert, CV_DbgAssert */ -CV_EXPORTS void error(int _code, const String& _err, const char* _func, const char* _file, int _line); +CV_EXPORTS CV_NORETURN void error(int _code, const String& _err, const char* _func, const char* _file, int _line); -#ifdef __GNUC__ -# if defined __clang__ || defined __APPLE__ -# pragma GCC diagnostic push -# pragma GCC diagnostic ignored "-Winvalid-noreturn" -# endif -#endif +#ifdef CV_STATIC_ANALYSIS -/** same as cv::error, but does not return */ -CV_INLINE CV_NORETURN void errorNoReturn(int _code, const String& _err, const char* _func, const char* _file, int _line) -{ - error(_code, _err, _func, _file, _line); -#ifdef __GNUC__ -# if !defined __clang__ && !defined __APPLE__ - // this suppresses this warning: "noreturn" function does return [enabled by default] - __builtin_trap(); - // or use infinite loop: for (;;) {} -# endif -#endif -} -#ifdef __GNUC__ -# if defined __clang__ || defined __APPLE__ -# pragma GCC diagnostic pop -# endif -#endif +// In practice, some macro are not processed correctly (noreturn is not detected). +// We need to use simplified definition for them. +#define CV_Error(code, msg) do { (void)(code); (void)(msg); abort(); } while (0) +#define CV_Error_(code, args) do { (void)(code); (void)(cv::format args); abort(); } while (0) +#define CV_Assert( expr ) do { if (!(expr)) abort(); } while (0) -#if defined __GNUC__ -#define CV_Func __func__ -#elif defined _MSC_VER -#define CV_Func __FUNCTION__ -#else -#define CV_Func "" -#endif +#else // CV_STATIC_ANALYSIS /** @brief Call the error handler. @@ -393,7 +325,7 @@ This macro can be used to construct an error message on-fly to include some dyna for example: @code // note the extra parentheses around the formatted text message - CV_Error_( CV_StsOutOfRange, + CV_Error_(Error::StsOutOfRange, ("the value at (%d, %d)=%g is out of range", badPt.x, badPt.y, badValue)); @endcode @param code one of Error::Code @@ -407,18 +339,39 @@ The macros CV_Assert (and CV_DbgAssert(expr)) evaluate the specified expression. raise an error (see cv::error). The macro CV_Assert checks the condition in both Debug and Release configurations while CV_DbgAssert is only retained in the Debug configuration. */ -#define CV_Assert( expr ) if(!!(expr)) ; else cv::error( cv::Error::StsAssert, #expr, CV_Func, __FILE__, __LINE__ ) +#define CV_Assert( expr ) do { if(!!(expr)) ; else cv::error( cv::Error::StsAssert, #expr, CV_Func, __FILE__, __LINE__ ); } while(0) -/** same as CV_Error(code,msg), but does not return */ -#define CV_ErrorNoReturn( code, msg ) cv::errorNoReturn( code, msg, CV_Func, __FILE__, __LINE__ ) +#endif // CV_STATIC_ANALYSIS -/** same as CV_Error_(code,args), but does not return */ -#define CV_ErrorNoReturn_( code, args ) cv::errorNoReturn( code, cv::format args, CV_Func, __FILE__, __LINE__ ) +//! @cond IGNORED +#if !defined(__OPENCV_BUILD) // TODO: backward compatibility only +#ifndef CV_ErrorNoReturn +#define CV_ErrorNoReturn CV_Error +#endif +#ifndef CV_ErrorNoReturn_ +#define CV_ErrorNoReturn_ CV_Error_ +#endif +#endif -/** replaced with CV_Assert(expr) in Debug configuration */ -#ifdef _DEBUG +#define CV_Assert_1 CV_Assert +#define CV_Assert_2( expr, ... ) CV_Assert_1(expr); __CV_EXPAND(CV_Assert_1( __VA_ARGS__ )) +#define CV_Assert_3( expr, ... ) CV_Assert_1(expr); __CV_EXPAND(CV_Assert_2( __VA_ARGS__ )) +#define CV_Assert_4( expr, ... ) CV_Assert_1(expr); __CV_EXPAND(CV_Assert_3( __VA_ARGS__ )) +#define CV_Assert_5( expr, ... ) CV_Assert_1(expr); __CV_EXPAND(CV_Assert_4( __VA_ARGS__ )) +#define CV_Assert_6( expr, ... ) CV_Assert_1(expr); __CV_EXPAND(CV_Assert_5( __VA_ARGS__ )) +#define CV_Assert_7( expr, ... ) CV_Assert_1(expr); __CV_EXPAND(CV_Assert_6( __VA_ARGS__ )) +#define CV_Assert_8( expr, ... ) CV_Assert_1(expr); __CV_EXPAND(CV_Assert_7( __VA_ARGS__ )) +#define CV_Assert_9( expr, ... ) CV_Assert_1(expr); __CV_EXPAND(CV_Assert_8( __VA_ARGS__ )) +#define CV_Assert_10( expr, ... ) CV_Assert_1(expr); __CV_EXPAND(CV_Assert_9( __VA_ARGS__ )) + +#define CV_Assert_N(...) do { __CV_EXPAND(__CV_CAT(CV_Assert_, __CV_VA_NUM_ARGS(__VA_ARGS__)) (__VA_ARGS__)); } while(0) + +//! @endcond + +#if defined _DEBUG || defined CV_STATIC_ANALYSIS # define CV_DbgAssert(expr) CV_Assert(expr) #else +/** replaced with CV_Assert(expr) in Debug configuration */ # define CV_DbgAssert(expr) #endif @@ -428,7 +381,7 @@ configurations while CV_DbgAssert is only retained in the Debug configuration. */ struct CV_EXPORTS Hamming { - enum { normType = NORM_HAMMING }; + static const NormTypes normType = NORM_HAMMING; typedef unsigned char ValueType; typedef int ResultType; @@ -665,13 +618,23 @@ namespace cudev namespace ipp { -CV_EXPORTS int getIppFeatures(); -CV_EXPORTS void setIppStatus(int status, const char * const funcname = NULL, const char * const filename = NULL, +CV_EXPORTS unsigned long long getIppFeatures(); +CV_EXPORTS void setIppStatus(int status, const char * const funcname = NULL, const char * const filename = NULL, int line = 0); -CV_EXPORTS int getIppStatus(); -CV_EXPORTS String getIppErrorLocation(); -CV_EXPORTS bool useIPP(); -CV_EXPORTS void setUseIPP(bool flag); +CV_EXPORTS int getIppStatus(); +CV_EXPORTS String getIppErrorLocation(); +CV_EXPORTS_W bool useIPP(); +CV_EXPORTS_W void setUseIPP(bool flag); +CV_EXPORTS_W String getIppVersion(); + +// IPP Not-Exact mode. This function may force use of IPP then both IPP and OpenCV provide proper results +// but have internal accuracy differences which have too much direct or indirect impact on accuracy tests. +CV_EXPORTS_W bool useIPP_NotExact(); +CV_EXPORTS_W void setUseIPP_NotExact(bool flag); +#ifndef DISABLE_OPENCV_3_COMPATIBILITY +static inline bool useIPP_NE() { return useIPP_NotExact(); } +static inline void setUseIPP_NE(bool flag) { setUseIPP_NotExact(flag); } +#endif } // ipp @@ -685,5 +648,7 @@ CV_EXPORTS void setUseIPP(bool flag); } // cv #include "opencv2/core/neon_utils.hpp" +#include "opencv2/core/vsx_utils.hpp" +#include "opencv2/core/check.hpp" -#endif //__OPENCV_CORE_BASE_HPP__ +#endif //OPENCV_CORE_BASE_HPP diff --git a/IPL/include/opencv/opencv2/core/bindings_utils.hpp b/IPL/include/opencv/opencv2/core/bindings_utils.hpp new file mode 100644 index 0000000..f693dc8 --- /dev/null +++ b/IPL/include/opencv/opencv2/core/bindings_utils.hpp @@ -0,0 +1,87 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#ifndef OPENCV_CORE_BINDINGS_UTILS_HPP +#define OPENCV_CORE_BINDINGS_UTILS_HPP + +#include +#include + +namespace cv { namespace utils { +//! @addtogroup core_utils +//! @{ + +CV_EXPORTS_W String dumpInputArray(InputArray argument); + +CV_EXPORTS_W String dumpInputArrayOfArrays(InputArrayOfArrays argument); + +CV_EXPORTS_W String dumpInputOutputArray(InputOutputArray argument); + +CV_EXPORTS_W String dumpInputOutputArrayOfArrays(InputOutputArrayOfArrays argument); + +CV_WRAP static inline +String dumpBool(bool argument) +{ + return (argument) ? String("Bool: True") : String("Bool: False"); +} + +CV_WRAP static inline +String dumpInt(int argument) +{ + return cv::format("Int: %d", argument); +} + +CV_WRAP static inline +String dumpSizeT(size_t argument) +{ + std::ostringstream oss("size_t: ", std::ios::ate); + oss << argument; + return oss.str(); +} + +CV_WRAP static inline +String dumpFloat(float argument) +{ + return cv::format("Float: %.2f", argument); +} + +CV_WRAP static inline +String dumpDouble(double argument) +{ + return cv::format("Double: %.2f", argument); +} + +CV_WRAP static inline +String dumpCString(const char* argument) +{ + return cv::format("String: %s", argument); +} + +CV_WRAP static inline +AsyncArray testAsyncArray(InputArray argument) +{ + AsyncPromise p; + p.setValue(argument); + return p.getArrayResult(); +} + +CV_WRAP static inline +AsyncArray testAsyncException() +{ + AsyncPromise p; + try + { + CV_Error(Error::StsOk, "Test: Generated async error"); + } + catch (const cv::Exception& e) + { + p.setException(e); + } + return p.getArrayResult(); +} + +//! @} +}} // namespace + +#endif // OPENCV_CORE_BINDINGS_UTILS_HPP diff --git a/IPL/include/opencv/opencv2/core/bufferpool.hpp b/IPL/include/opencv/opencv2/core/bufferpool.hpp index 76df2d2..4698e5d 100644 --- a/IPL/include/opencv/opencv2/core/bufferpool.hpp +++ b/IPL/include/opencv/opencv2/core/bufferpool.hpp @@ -4,8 +4,13 @@ // // Copyright (C) 2014, Advanced Micro Devices, Inc., all rights reserved. -#ifndef __OPENCV_CORE_BUFFER_POOL_HPP__ -#define __OPENCV_CORE_BUFFER_POOL_HPP__ +#ifndef OPENCV_CORE_BUFFER_POOL_HPP +#define OPENCV_CORE_BUFFER_POOL_HPP + +#ifdef _MSC_VER +#pragma warning(push) +#pragma warning(disable: 4265) +#endif namespace cv { @@ -28,4 +33,8 @@ class BufferPoolController } -#endif // __OPENCV_CORE_BUFFER_POOL_HPP__ +#ifdef _MSC_VER +#pragma warning(pop) +#endif + +#endif // OPENCV_CORE_BUFFER_POOL_HPP diff --git a/IPL/include/opencv/opencv2/core/check.hpp b/IPL/include/opencv/opencv2/core/check.hpp new file mode 100644 index 0000000..0e0c7cb --- /dev/null +++ b/IPL/include/opencv/opencv2/core/check.hpp @@ -0,0 +1,160 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#ifndef OPENCV_CORE_CHECK_HPP +#define OPENCV_CORE_CHECK_HPP + +#include + +namespace cv { + +/** Returns string of cv::Mat depth value: CV_8U -> "CV_8U" or "" */ +CV_EXPORTS const char* depthToString(int depth); + +/** Returns string of cv::Mat depth value: CV_8UC3 -> "CV_8UC3" or "" */ +CV_EXPORTS const String typeToString(int type); + + +//! @cond IGNORED +namespace detail { + +/** Returns string of cv::Mat depth value: CV_8U -> "CV_8U" or NULL */ +CV_EXPORTS const char* depthToString_(int depth); + +/** Returns string of cv::Mat depth value: CV_8UC3 -> "CV_8UC3" or cv::String() */ +CV_EXPORTS const cv::String typeToString_(int type); + +enum TestOp { + TEST_CUSTOM = 0, + TEST_EQ = 1, + TEST_NE = 2, + TEST_LE = 3, + TEST_LT = 4, + TEST_GE = 5, + TEST_GT = 6, + CV__LAST_TEST_OP +}; + +struct CheckContext { + const char* func; + const char* file; + int line; + enum TestOp testOp; + const char* message; + const char* p1_str; + const char* p2_str; +}; + +#ifndef CV__CHECK_FILENAME +# define CV__CHECK_FILENAME __FILE__ +#endif + +#ifndef CV__CHECK_FUNCTION +# if defined _MSC_VER +# define CV__CHECK_FUNCTION __FUNCSIG__ +# elif defined __GNUC__ +# define CV__CHECK_FUNCTION __PRETTY_FUNCTION__ +# else +# define CV__CHECK_FUNCTION "" +# endif +#endif + +#define CV__CHECK_LOCATION_VARNAME(id) CVAUX_CONCAT(CVAUX_CONCAT(__cv_check_, id), __LINE__) +#define CV__DEFINE_CHECK_CONTEXT(id, message, testOp, p1_str, p2_str) \ + static const cv::detail::CheckContext CV__CHECK_LOCATION_VARNAME(id) = \ + { CV__CHECK_FUNCTION, CV__CHECK_FILENAME, __LINE__, testOp, message, p1_str, p2_str } + +CV_EXPORTS void CV_NORETURN check_failed_auto(const int v1, const int v2, const CheckContext& ctx); +CV_EXPORTS void CV_NORETURN check_failed_auto(const size_t v1, const size_t v2, const CheckContext& ctx); +CV_EXPORTS void CV_NORETURN check_failed_auto(const float v1, const float v2, const CheckContext& ctx); +CV_EXPORTS void CV_NORETURN check_failed_auto(const double v1, const double v2, const CheckContext& ctx); +CV_EXPORTS void CV_NORETURN check_failed_auto(const Size_ v1, const Size_ v2, const CheckContext& ctx); +CV_EXPORTS void CV_NORETURN check_failed_MatDepth(const int v1, const int v2, const CheckContext& ctx); +CV_EXPORTS void CV_NORETURN check_failed_MatType(const int v1, const int v2, const CheckContext& ctx); +CV_EXPORTS void CV_NORETURN check_failed_MatChannels(const int v1, const int v2, const CheckContext& ctx); + +CV_EXPORTS void CV_NORETURN check_failed_auto(const int v, const CheckContext& ctx); +CV_EXPORTS void CV_NORETURN check_failed_auto(const size_t v, const CheckContext& ctx); +CV_EXPORTS void CV_NORETURN check_failed_auto(const float v, const CheckContext& ctx); +CV_EXPORTS void CV_NORETURN check_failed_auto(const double v, const CheckContext& ctx); +CV_EXPORTS void CV_NORETURN check_failed_auto(const Size_ v, const CheckContext& ctx); +CV_EXPORTS void CV_NORETURN check_failed_auto(const std::string& v1, const CheckContext& ctx); +CV_EXPORTS void CV_NORETURN check_failed_MatDepth(const int v, const CheckContext& ctx); +CV_EXPORTS void CV_NORETURN check_failed_MatType(const int v, const CheckContext& ctx); +CV_EXPORTS void CV_NORETURN check_failed_MatChannels(const int v, const CheckContext& ctx); + + +#define CV__TEST_EQ(v1, v2) ((v1) == (v2)) +#define CV__TEST_NE(v1, v2) ((v1) != (v2)) +#define CV__TEST_LE(v1, v2) ((v1) <= (v2)) +#define CV__TEST_LT(v1, v2) ((v1) < (v2)) +#define CV__TEST_GE(v1, v2) ((v1) >= (v2)) +#define CV__TEST_GT(v1, v2) ((v1) > (v2)) + +#define CV__CHECK(id, op, type, v1, v2, v1_str, v2_str, msg_str) do { \ + if(CV__TEST_##op((v1), (v2))) ; else { \ + CV__DEFINE_CHECK_CONTEXT(id, msg_str, cv::detail::TEST_ ## op, v1_str, v2_str); \ + cv::detail::check_failed_ ## type((v1), (v2), CV__CHECK_LOCATION_VARNAME(id)); \ + } \ +} while (0) + +#define CV__CHECK_CUSTOM_TEST(id, type, v, test_expr, v_str, test_expr_str, msg_str) do { \ + if(!!(test_expr)) ; else { \ + CV__DEFINE_CHECK_CONTEXT(id, msg_str, cv::detail::TEST_CUSTOM, v_str, test_expr_str); \ + cv::detail::check_failed_ ## type((v), CV__CHECK_LOCATION_VARNAME(id)); \ + } \ +} while (0) + +} // namespace +//! @endcond + + +/// Supported values of these types: int, float, double +#define CV_CheckEQ(v1, v2, msg) CV__CHECK(_, EQ, auto, v1, v2, #v1, #v2, msg) +#define CV_CheckNE(v1, v2, msg) CV__CHECK(_, NE, auto, v1, v2, #v1, #v2, msg) +#define CV_CheckLE(v1, v2, msg) CV__CHECK(_, LE, auto, v1, v2, #v1, #v2, msg) +#define CV_CheckLT(v1, v2, msg) CV__CHECK(_, LT, auto, v1, v2, #v1, #v2, msg) +#define CV_CheckGE(v1, v2, msg) CV__CHECK(_, GE, auto, v1, v2, #v1, #v2, msg) +#define CV_CheckGT(v1, v2, msg) CV__CHECK(_, GT, auto, v1, v2, #v1, #v2, msg) + +/// Check with additional "decoding" of type values in error message +#define CV_CheckTypeEQ(t1, t2, msg) CV__CHECK(_, EQ, MatType, t1, t2, #t1, #t2, msg) +/// Check with additional "decoding" of depth values in error message +#define CV_CheckDepthEQ(d1, d2, msg) CV__CHECK(_, EQ, MatDepth, d1, d2, #d1, #d2, msg) + +#define CV_CheckChannelsEQ(c1, c2, msg) CV__CHECK(_, EQ, MatChannels, c1, c2, #c1, #c2, msg) + +/// Example: type == CV_8UC1 || type == CV_8UC3 +#define CV_CheckType(t, test_expr, msg) CV__CHECK_CUSTOM_TEST(_, MatType, t, (test_expr), #t, #test_expr, msg) + +/// Example: depth == CV_32F || depth == CV_64F +#define CV_CheckDepth(t, test_expr, msg) CV__CHECK_CUSTOM_TEST(_, MatDepth, t, (test_expr), #t, #test_expr, msg) + +/// Example: v == A || v == B +#define CV_Check(v, test_expr, msg) CV__CHECK_CUSTOM_TEST(_, auto, v, (test_expr), #v, #test_expr, msg) + +/// Some complex conditions: CV_Check(src2, src2.empty() || (src2.type() == src1.type() && src2.size() == src1.size()), "src2 should have same size/type as src1") +// TODO define pretty-printers + +#ifndef NDEBUG +#define CV_DbgCheck(v, test_expr, msg) CV__CHECK_CUSTOM_TEST(_, auto, v, (test_expr), #v, #test_expr, msg) +#define CV_DbgCheckEQ(v1, v2, msg) CV__CHECK(_, EQ, auto, v1, v2, #v1, #v2, msg) +#define CV_DbgCheckNE(v1, v2, msg) CV__CHECK(_, NE, auto, v1, v2, #v1, #v2, msg) +#define CV_DbgCheckLE(v1, v2, msg) CV__CHECK(_, LE, auto, v1, v2, #v1, #v2, msg) +#define CV_DbgCheckLT(v1, v2, msg) CV__CHECK(_, LT, auto, v1, v2, #v1, #v2, msg) +#define CV_DbgCheckGE(v1, v2, msg) CV__CHECK(_, GE, auto, v1, v2, #v1, #v2, msg) +#define CV_DbgCheckGT(v1, v2, msg) CV__CHECK(_, GT, auto, v1, v2, #v1, #v2, msg) +#else +#define CV_DbgCheck(v, test_expr, msg) do { } while (0) +#define CV_DbgCheckEQ(v1, v2, msg) do { } while (0) +#define CV_DbgCheckNE(v1, v2, msg) do { } while (0) +#define CV_DbgCheckLE(v1, v2, msg) do { } while (0) +#define CV_DbgCheckLT(v1, v2, msg) do { } while (0) +#define CV_DbgCheckGE(v1, v2, msg) do { } while (0) +#define CV_DbgCheckGT(v1, v2, msg) do { } while (0) +#endif + +} // namespace + +#endif // OPENCV_CORE_CHECK_HPP diff --git a/IPL/include/opencv/opencv2/core/core_c.h b/IPL/include/opencv/opencv2/core/core_c.h index a0ed632..5dd1a8f 100644 --- a/IPL/include/opencv/opencv2/core/core_c.h +++ b/IPL/include/opencv/opencv2/core/core_c.h @@ -42,8 +42,8 @@ //M*/ -#ifndef __OPENCV_CORE_C_H__ -#define __OPENCV_CORE_C_H__ +#ifndef OPENCV_CORE_C_H +#define OPENCV_CORE_C_H #include "opencv2/core/types_c.h" @@ -53,7 +53,7 @@ which is incompatible with C It is OK to disable it because we only extend few plain structures with - C++ construrtors for simpler interoperability with C++ API of the library + C++ constructors for simpler interoperability with C++ API of the library */ # pragma warning(disable:4190) # elif defined __clang__ && __clang_major__ >= 3 @@ -359,7 +359,7 @@ CVAPI(CvMat*) cvGetSubRect( const CvArr* arr, CvMat* submat, CvRect rect ); /** @brief Returns array row or row span. -The functions return the header, corresponding to a specified row/row span of the input array. +The function returns the header, corresponding to a specified row/row span of the input array. cvGetRow(arr, submat, row) is a shortcut for cvGetRows(arr, submat, row, row+1). @param arr Input array @param submat Pointer to the resulting sub-array header @@ -385,7 +385,7 @@ CV_INLINE CvMat* cvGetRow( const CvArr* arr, CvMat* submat, int row ) /** @brief Returns one of more array columns. -The functions return the header, corresponding to a specified column span of the input array. That +The function returns the header, corresponding to a specified column span of the input array. That is, no data is copied. Therefore, any modifications of the submatrix will affect the original array. If you need to copy the columns, use cvCloneMat. cvGetCol(arr, submat, col) is a shortcut for @@ -579,7 +579,7 @@ CvNArrayIterator; #define CV_NO_CN_CHECK 2 #define CV_NO_SIZE_CHECK 4 -/** initializes iterator that traverses through several arrays simulteneously +/** initializes iterator that traverses through several arrays simultaneously (the function together with cvNextArraySlice is used for N-ari element-wise operations) */ CVAPI(int) cvInitNArrayIterator( int count, CvArr** arrs, @@ -1309,7 +1309,7 @@ CVAPI(void) cvMulTransposed( const CvArr* src, CvArr* dst, int order, const CvArr* delta CV_DEFAULT(NULL), double scale CV_DEFAULT(1.) ); -/** Tranposes matrix. Square matrices can be transposed in-place */ +/** Transposes matrix. Square matrices can be transposed in-place */ CVAPI(void) cvTranspose( const CvArr* src, CvArr* dst ); #define cvT cvTranspose @@ -1576,8 +1576,8 @@ CVAPI(void) cvRestoreMemStoragePos( CvMemStorage* storage, CvMemStoragePos* pos CVAPI(void*) cvMemStorageAlloc( CvMemStorage* storage, size_t size ); /** Allocates string in memory storage */ -CVAPI(CvString) cvMemStorageAllocString( CvMemStorage* storage, const char* ptr, - int len CV_DEFAULT(-1) ); +//CVAPI(CvString) cvMemStorageAllocString( CvMemStorage* storage, const char* ptr, +// int len CV_DEFAULT(-1) ); /** Creates new empty sequence that will reside in the specified storage */ CVAPI(CvSeq*) cvCreateSeq( int seq_flags, size_t header_size, @@ -1788,7 +1788,7 @@ CVAPI(int) cvGraphRemoveVtx( CvGraph* graph, int index ); CVAPI(int) cvGraphRemoveVtxByPtr( CvGraph* graph, CvGraphVtx* vtx ); -/** Link two vertices specifed by indices or pointers if they +/** Link two vertices specified by indices or pointers if they are not connected or return pointer to already existing edge connecting the vertices. Functions return 1 if a new edge was created, 0 otherwise */ @@ -1970,14 +1970,19 @@ CVAPI(void) cvSetIPLAllocators( Cv_iplCreateImageHeader create_header, * Data Persistence * \****************************************************************************************/ +#if 0 /********************************** High-level functions ********************************/ /** @brief Opens file storage for reading or writing data. The function opens file storage for reading or writing data. In the latter case, a new file is created or an existing file is rewritten. The type of the read or written file is determined by the -filename extension: .xml for XML and .yml or .yaml for YAML. The function returns a pointer to the -CvFileStorage structure. If the file cannot be opened then the function returns NULL. +filename extension: .xml for XML, .yml or .yaml for YAML and .json for JSON. + +At the same time, it also supports adding parameters like "example.xml?base64". + +The function returns a pointer to the CvFileStorage structure. +If the file cannot be opened then the function returns NULL. @param filename Name of the file associated with the storage @param memstorage Memory storage used for temporary data and for : storing dynamic structures, such as CvSeq or CvGraph . If it is NULL, a temporary memory @@ -1985,6 +1990,7 @@ CvFileStorage structure. If the file cannot be opened then the function returns @param flags Can be one of the following: > - **CV_STORAGE_READ** the storage is open for reading > - **CV_STORAGE_WRITE** the storage is open for writing + (use **CV_STORAGE_WRITE | CV_STORAGE_WRITE_BASE64** to write rawdata in Base64) @param encoding */ CVAPI(CvFileStorage*) cvOpenFileStorage( const char* filename, CvMemStorage* memstorage, @@ -2022,7 +2028,8 @@ One and only one of the two above flags must be specified @param type_name Optional parameter - the object type name. In case of XML it is written as a type_id attribute of the structure opening tag. In the case of YAML it is written after a colon following the structure name (see the example in - CvFileStorage description). Mainly it is used with user objects. When the storage is read, the + CvFileStorage description). In case of JSON it is written as a name/value pair. + Mainly it is used with user objects. When the storage is read, the encoded type name is used to determine the object type (see CvTypeInfo and cvFindType ). @param attributes This parameter is not used in the current implementation */ @@ -2059,9 +2066,9 @@ such as termination criteria, without registering a new type. : { cvStartWriteStruct( fs, struct_name, CV_NODE_MAP, NULL, cvAttrList(0,0)); cvWriteComment( fs, "termination criteria", 1 ); // just a description - if( termcrit->type & CV_TERMCRIT_ITER ) + if( termcrit->type & cv::TermCriteria::MAX_ITER ) cvWriteInteger( fs, "max_iterations", termcrit->max_iter ); - if( termcrit->type & CV_TERMCRIT_EPS ) + if( termcrit->type & cv::TermCriteria::EPS ) cvWriteReal( fs, "accuracy", termcrit->epsilon ); cvEndWriteStruct( fs ); } @@ -2162,7 +2169,7 @@ the file with multiple streams looks like this: @endcode The YAML file will look like this: @code{.yaml} - %YAML:1.0 + %YAML 1.0 # stream #1 data ... --- @@ -2187,6 +2194,23 @@ to a sequence rather than a map. CVAPI(void) cvWriteRawData( CvFileStorage* fs, const void* src, int len, const char* dt ); +/** @brief Writes multiple numbers in Base64. + +If either CV_STORAGE_WRITE_BASE64 or cv::FileStorage::WRITE_BASE64 is used, +this function will be the same as cvWriteRawData. If neither, the main +difference is that it outputs a sequence in Base64 encoding rather than +in plain text. + +This function can only be used to write a sequence with a type "binary". + +@param fs File storage +@param src Pointer to the written array +@param len Number of the array elements to write +@param dt Specification of each array element, see @ref format_spec "format specification" +*/ +CVAPI(void) cvWriteRawDataBase64( CvFileStorage* fs, const void* src, + int len, const char* dt ); + /** @brief Returns a unique pointer for a given name. The function returns a unique pointer for each particular file node name. This pointer can be then @@ -2468,7 +2492,7 @@ CVAPI(void) cvReadRawData( const CvFileStorage* fs, const CvFileNode* src, /** @brief Writes a file node to another file storage. The function writes a copy of a file node to file storage. Possible applications of the function are -merging several file storages into one and conversion between XML and YAML formats. +merging several file storages into one and conversion between XML, YAML and JSON formats. @param fs Destination file storage @param new_node_name New name of the file node in the destination file storage. To keep the existing name, use cvcvGetFileNodeName @@ -2533,10 +2557,12 @@ returns NULL. */ CVAPI(CvTypeInfo*) cvTypeOf( const void* struct_ptr ); +#endif + /** @brief Releases an object. -The function finds the type of a given object and calls release with the double pointer. -@param struct_ptr Double pointer to the object + The function finds the type of a given object and calls release with the double pointer. + @param struct_ptr Double pointer to the object */ CVAPI(void) cvRelease( void** struct_ptr ); @@ -2549,41 +2575,6 @@ function, like cvCloneMat. */ CVAPI(void*) cvClone( const void* struct_ptr ); -/** @brief Saves an object to a file. - -The function saves an object to a file. It provides a simple interface to cvWrite . -@param filename File name -@param struct_ptr Object to save -@param name Optional object name. If it is NULL, the name will be formed from filename . -@param comment Optional comment to put in the beginning of the file -@param attributes Optional attributes passed to cvWrite - */ -CVAPI(void) cvSave( const char* filename, const void* struct_ptr, - const char* name CV_DEFAULT(NULL), - const char* comment CV_DEFAULT(NULL), - CvAttrList attributes CV_DEFAULT(cvAttrList())); - -/** @brief Loads an object from a file. - -The function loads an object from a file. It basically reads the specified file, find the first -top-level node and calls cvRead for that node. If the file node does not have type information or -the type information can not be found by the type name, the function returns NULL. After the object -is loaded, the file storage is closed and all the temporary buffers are deleted. Thus, to load a -dynamic structure, such as a sequence, contour, or graph, one should pass a valid memory storage -destination to the function. -@param filename File name -@param memstorage Memory storage for dynamic structures, such as CvSeq or CvGraph . It is not used -for matrices or images. -@param name Optional object name. If it is NULL, the first top-level object in the storage will be -loaded. -@param real_name Optional output parameter that will contain the name of the loaded object -(useful if name=NULL ) - */ -CVAPI(void*) cvLoad( const char* filename, - CvMemStorage* memstorage CV_DEFAULT(NULL), - const char* name CV_DEFAULT(NULL), - const char** real_name CV_DEFAULT(NULL) ); - /*********************************** Measuring Execution Time ***************************/ /** helper functions for RNG initialization and accurate time measurement: @@ -2616,13 +2607,13 @@ CVAPI(void) cvSetErrStatus( int status ); #define CV_ErrModeParent 1 /* Print error and continue */ #define CV_ErrModeSilent 2 /* Don't print and continue */ -/** Retrives current error processing mode */ +/** Retrieves current error processing mode */ CVAPI(int) cvGetErrMode( void ); /** Sets error processing mode, returns previously used mode */ CVAPI(int) cvSetErrMode( int mode ); -/** Sets error status and performs some additonal actions (displaying message box, +/** Sets error status and performs some additional actions (displaying message box, writing message to stderr, terminating application etc.) depending on the current error mode */ CVAPI(void) cvError( int status, const char* func_name, @@ -2631,7 +2622,7 @@ CVAPI(void) cvError( int status, const char* func_name, /** Retrieves textual description of the error given its code */ CVAPI(const char*) cvErrorStr( int status ); -/** Retrieves detailed information about the last error occured */ +/** Retrieves detailed information about the last error occurred */ CVAPI(int) cvGetErrInfo( const char** errcode_desc, const char** description, const char** filename, int* line ); @@ -2706,7 +2697,7 @@ static char cvFuncName[] = Name /** CV_CALL macro calls CV (or IPL) function, checks error status and signals a error if the function failed. Useful in "parent node" - error procesing mode + error processing mode */ #define CV_CALL( Func ) \ { \ @@ -2734,24 +2725,6 @@ static char cvFuncName[] = Name #ifdef __cplusplus -//! @addtogroup core_c_glue -//! @{ - -//! class for automatic module/RTTI data registration/unregistration -struct CV_EXPORTS CvType -{ - CvType( const char* type_name, - CvIsInstanceFunc is_instance, CvReleaseFunc release=0, - CvReadFunc read=0, CvWriteFunc write=0, CvCloneFunc clone=0 ); - ~CvType(); - CvTypeInfo* info; - - static CvTypeInfo* first; - static CvTypeInfo* last; -}; - -//! @} - #include "opencv2/core/utility.hpp" namespace cv @@ -2782,11 +2755,11 @@ CV_EXPORTS void insertImageCOI(InputArray coiimg, CvArr* arr, int coi=-1); ////// specialized implementations of DefaultDeleter::operator() for classic OpenCV types ////// -template<> CV_EXPORTS void DefaultDeleter::operator ()(CvMat* obj) const; -template<> CV_EXPORTS void DefaultDeleter::operator ()(IplImage* obj) const; -template<> CV_EXPORTS void DefaultDeleter::operator ()(CvMatND* obj) const; -template<> CV_EXPORTS void DefaultDeleter::operator ()(CvSparseMat* obj) const; -template<> CV_EXPORTS void DefaultDeleter::operator ()(CvMemStorage* obj) const; +template<> struct DefaultDeleter{ CV_EXPORTS void operator ()(CvMat* obj) const; }; +template<> struct DefaultDeleter{ CV_EXPORTS void operator ()(IplImage* obj) const; }; +template<> struct DefaultDeleter{ CV_EXPORTS void operator ()(CvMatND* obj) const; }; +template<> struct DefaultDeleter{ CV_EXPORTS void operator ()(CvSparseMat* obj) const; }; +template<> struct DefaultDeleter{ CV_EXPORTS void operator ()(CvMemStorage* obj) const; }; ////////////// convenient wrappers for operating old-style dynamic structures ////////////// @@ -3041,7 +3014,7 @@ template inline void Seq<_Tp>::copyTo(std::vector<_Tp>& vec, const size_t len = !seq ? 0 : range == Range::all() ? seq->total : range.end - range.start; vec.resize(len); if( seq && len ) - cvCvtSeqToArray(seq, &vec[0], range); + cvCvtSeqToArray(seq, &vec[0], cvSlice(range)); } template inline Seq<_Tp>::operator std::vector<_Tp>() const diff --git a/IPL/include/opencv/opencv2/core/cuda.hpp b/IPL/include/opencv/opencv2/core/cuda.hpp index 64bc53e..5d94b72 100644 --- a/IPL/include/opencv/opencv2/core/cuda.hpp +++ b/IPL/include/opencv/opencv2/core/cuda.hpp @@ -41,8 +41,8 @@ // //M*/ -#ifndef __OPENCV_CORE_CUDA_HPP__ -#define __OPENCV_CORE_CUDA_HPP__ +#ifndef OPENCV_CORE_CUDA_HPP +#define OPENCV_CORE_CUDA_HPP #ifndef __cplusplus # error cuda.hpp header must be compiled as C++ @@ -56,7 +56,7 @@ @{ @defgroup cudacore Core part @{ - @defgroup cudacore_init Initalization and Information + @defgroup cudacore_init Initialization and Information @defgroup cudacore_struct Data Structures @} @} @@ -91,12 +91,21 @@ aligned to a size depending on the hardware. Single-row GpuMat is always a conti on its destructor. The destruction order of such variables and CUDA context is undefined. GPU memory release function returns error if the CUDA context has been destroyed before. +Some member functions are described as a "Blocking Call" while some are described as a +"Non-Blocking Call". Blocking functions are synchronous to host. It is guaranteed that the GPU +operation is finished when the function returns. However, non-blocking functions are asynchronous to +host. Those functions may return even if the GPU operation is not finished. + +Compared to their blocking counterpart, non-blocking functions accept Stream as an additional +argument. If a non-default stream is passed, the GPU operation may overlap with operations in other +streams. + @sa Mat */ -class CV_EXPORTS GpuMat +class CV_EXPORTS_W GpuMat { public: - class CV_EXPORTS Allocator + class CV_EXPORTS_W Allocator { public: virtual ~Allocator() {} @@ -107,33 +116,33 @@ class CV_EXPORTS GpuMat }; //! default allocator - static Allocator* defaultAllocator(); - static void setDefaultAllocator(Allocator* allocator); + CV_WRAP static GpuMat::Allocator* defaultAllocator(); + CV_WRAP static void setDefaultAllocator(GpuMat::Allocator* allocator); //! default constructor - explicit GpuMat(Allocator* allocator = defaultAllocator()); + CV_WRAP explicit GpuMat(GpuMat::Allocator* allocator = GpuMat::defaultAllocator()); //! constructs GpuMat of the specified size and type - GpuMat(int rows, int cols, int type, Allocator* allocator = defaultAllocator()); - GpuMat(Size size, int type, Allocator* allocator = defaultAllocator()); + CV_WRAP GpuMat(int rows, int cols, int type, GpuMat::Allocator* allocator = GpuMat::defaultAllocator()); + CV_WRAP GpuMat(Size size, int type, GpuMat::Allocator* allocator = GpuMat::defaultAllocator()); - //! constucts GpuMat and fills it with the specified value _s - GpuMat(int rows, int cols, int type, Scalar s, Allocator* allocator = defaultAllocator()); - GpuMat(Size size, int type, Scalar s, Allocator* allocator = defaultAllocator()); + //! constructs GpuMat and fills it with the specified value _s + CV_WRAP GpuMat(int rows, int cols, int type, Scalar s, GpuMat::Allocator* allocator = GpuMat::defaultAllocator()); + CV_WRAP GpuMat(Size size, int type, Scalar s, GpuMat::Allocator* allocator = GpuMat::defaultAllocator()); //! copy constructor - GpuMat(const GpuMat& m); + CV_WRAP GpuMat(const GpuMat& m); //! constructor for GpuMat headers pointing to user-allocated data GpuMat(int rows, int cols, int type, void* data, size_t step = Mat::AUTO_STEP); GpuMat(Size size, int type, void* data, size_t step = Mat::AUTO_STEP); //! creates a GpuMat header for a part of the bigger matrix - GpuMat(const GpuMat& m, Range rowRange, Range colRange); - GpuMat(const GpuMat& m, Rect roi); + CV_WRAP GpuMat(const GpuMat& m, Range rowRange, Range colRange); + CV_WRAP GpuMat(const GpuMat& m, Rect roi); //! builds GpuMat from host memory (Blocking call) - explicit GpuMat(InputArray arr, Allocator* allocator = defaultAllocator()); + CV_WRAP explicit GpuMat(InputArray arr, GpuMat::Allocator* allocator = GpuMat::defaultAllocator()); //! destructor - calls release() ~GpuMat(); @@ -142,70 +151,92 @@ class CV_EXPORTS GpuMat GpuMat& operator =(const GpuMat& m); //! allocates new GpuMat data unless the GpuMat already has specified size and type - void create(int rows, int cols, int type); - void create(Size size, int type); + CV_WRAP void create(int rows, int cols, int type); + CV_WRAP void create(Size size, int type); //! decreases reference counter, deallocate the data when reference counter reaches 0 void release(); //! swaps with other smart pointer - void swap(GpuMat& mat); + CV_WRAP void swap(GpuMat& mat); + + /** @brief Performs data upload to GpuMat (Blocking call) + + This function copies data from host memory to device memory. As being a blocking call, it is + guaranteed that the copy operation is finished when this function returns. + */ + CV_WRAP void upload(InputArray arr); + + /** @brief Performs data upload to GpuMat (Non-Blocking call) + + This function copies data from host memory to device memory. As being a non-blocking call, this + function may return even if the copy operation is not finished. - //! pefroms upload data to GpuMat (Blocking call) - void upload(InputArray arr); + The copy operation may be overlapped with operations in other non-default streams if \p stream is + not the default stream and \p dst is HostMem allocated with HostMem::PAGE_LOCKED option. + */ + CV_WRAP void upload(InputArray arr, Stream& stream); + + /** @brief Performs data download from GpuMat (Blocking call) + + This function copies data from device memory to host memory. As being a blocking call, it is + guaranteed that the copy operation is finished when this function returns. + */ + CV_WRAP void download(OutputArray dst) const; - //! pefroms upload data to GpuMat (Non-Blocking call) - void upload(InputArray arr, Stream& stream); + /** @brief Performs data download from GpuMat (Non-Blocking call) - //! pefroms download data from device to host memory (Blocking call) - void download(OutputArray dst) const; + This function copies data from device memory to host memory. As being a non-blocking call, this + function may return even if the copy operation is not finished. - //! pefroms download data from device to host memory (Non-Blocking call) - void download(OutputArray dst, Stream& stream) const; + The copy operation may be overlapped with operations in other non-default streams if \p stream is + not the default stream and \p dst is HostMem allocated with HostMem::PAGE_LOCKED option. + */ + CV_WRAP void download(OutputArray dst, Stream& stream) const; //! returns deep copy of the GpuMat, i.e. the data is copied - GpuMat clone() const; + CV_WRAP GpuMat clone() const; //! copies the GpuMat content to device memory (Blocking call) - void copyTo(OutputArray dst) const; + CV_WRAP void copyTo(OutputArray dst) const; //! copies the GpuMat content to device memory (Non-Blocking call) - void copyTo(OutputArray dst, Stream& stream) const; + CV_WRAP void copyTo(OutputArray dst, Stream& stream) const; //! copies those GpuMat elements to "m" that are marked with non-zero mask elements (Blocking call) - void copyTo(OutputArray dst, InputArray mask) const; + CV_WRAP void copyTo(OutputArray dst, InputArray mask) const; //! copies those GpuMat elements to "m" that are marked with non-zero mask elements (Non-Blocking call) - void copyTo(OutputArray dst, InputArray mask, Stream& stream) const; + CV_WRAP void copyTo(OutputArray dst, InputArray mask, Stream& stream) const; //! sets some of the GpuMat elements to s (Blocking call) - GpuMat& setTo(Scalar s); + CV_WRAP GpuMat& setTo(Scalar s); //! sets some of the GpuMat elements to s (Non-Blocking call) - GpuMat& setTo(Scalar s, Stream& stream); + CV_WRAP GpuMat& setTo(Scalar s, Stream& stream); //! sets some of the GpuMat elements to s, according to the mask (Blocking call) - GpuMat& setTo(Scalar s, InputArray mask); + CV_WRAP GpuMat& setTo(Scalar s, InputArray mask); //! sets some of the GpuMat elements to s, according to the mask (Non-Blocking call) - GpuMat& setTo(Scalar s, InputArray mask, Stream& stream); + CV_WRAP GpuMat& setTo(Scalar s, InputArray mask, Stream& stream); //! converts GpuMat to another datatype (Blocking call) - void convertTo(OutputArray dst, int rtype) const; + CV_WRAP void convertTo(OutputArray dst, int rtype) const; //! converts GpuMat to another datatype (Non-Blocking call) - void convertTo(OutputArray dst, int rtype, Stream& stream) const; + CV_WRAP void convertTo(OutputArray dst, int rtype, Stream& stream) const; //! converts GpuMat to another datatype with scaling (Blocking call) - void convertTo(OutputArray dst, int rtype, double alpha, double beta = 0.0) const; + CV_WRAP void convertTo(OutputArray dst, int rtype, double alpha, double beta = 0.0) const; //! converts GpuMat to another datatype with scaling (Non-Blocking call) - void convertTo(OutputArray dst, int rtype, double alpha, Stream& stream) const; + CV_WRAP void convertTo(OutputArray dst, int rtype, double alpha, Stream& stream) const; //! converts GpuMat to another datatype with scaling (Non-Blocking call) - void convertTo(OutputArray dst, int rtype, double alpha, double beta, Stream& stream) const; + CV_WRAP void convertTo(OutputArray dst, int rtype, double alpha, double beta, Stream& stream) const; - void assignTo(GpuMat& m, int type=-1) const; + CV_WRAP void assignTo(GpuMat& m, int type = -1) const; //! returns pointer to y-th row uchar* ptr(int y = 0); @@ -219,18 +250,18 @@ class CV_EXPORTS GpuMat template operator PtrStep<_Tp>() const; //! returns a new GpuMat header for the specified row - GpuMat row(int y) const; + CV_WRAP GpuMat row(int y) const; //! returns a new GpuMat header for the specified column - GpuMat col(int x) const; + CV_WRAP GpuMat col(int x) const; //! ... for the specified row span - GpuMat rowRange(int startrow, int endrow) const; - GpuMat rowRange(Range r) const; + CV_WRAP GpuMat rowRange(int startrow, int endrow) const; + CV_WRAP GpuMat rowRange(Range r) const; //! ... for the specified column span - GpuMat colRange(int startcol, int endcol) const; - GpuMat colRange(Range r) const; + CV_WRAP GpuMat colRange(int startcol, int endcol) const; + CV_WRAP GpuMat colRange(Range r) const; //! extracts a rectangular sub-GpuMat (this is a generalized form of row, rowRange etc.) GpuMat operator ()(Range rowRange, Range colRange) const; @@ -238,41 +269,47 @@ class CV_EXPORTS GpuMat //! creates alternative GpuMat header for the same data, with different //! number of channels and/or different number of rows - GpuMat reshape(int cn, int rows = 0) const; + CV_WRAP GpuMat reshape(int cn, int rows = 0) const; //! locates GpuMat header within a parent GpuMat - void locateROI(Size& wholeSize, Point& ofs) const; + CV_WRAP void locateROI(Size& wholeSize, Point& ofs) const; //! moves/resizes the current GpuMat ROI inside the parent GpuMat - GpuMat& adjustROI(int dtop, int dbottom, int dleft, int dright); + CV_WRAP GpuMat& adjustROI(int dtop, int dbottom, int dleft, int dright); //! returns true iff the GpuMat data is continuous //! (i.e. when there are no gaps between successive rows) - bool isContinuous() const; + CV_WRAP bool isContinuous() const; //! returns element size in bytes - size_t elemSize() const; + CV_WRAP size_t elemSize() const; //! returns the size of element channel in bytes - size_t elemSize1() const; + CV_WRAP size_t elemSize1() const; //! returns element type - int type() const; + CV_WRAP int type() const; //! returns element type - int depth() const; + CV_WRAP int depth() const; //! returns number of channels - int channels() const; + CV_WRAP int channels() const; //! returns step/elemSize1() - size_t step1() const; + CV_WRAP size_t step1() const; //! returns GpuMat size : width == number of columns, height == number of rows - Size size() const; + CV_WRAP Size size() const; //! returns true if GpuMat data is NULL - bool empty() const; + CV_WRAP bool empty() const; + + // returns pointer to cuda memory + CV_WRAP void* cudaPtr() const; + + //! internal use method: updates the continuity flag + CV_WRAP void updateContinuityFlag(); /*! includes several bit-fields: - the magic signature @@ -286,7 +323,7 @@ class CV_EXPORTS GpuMat int rows, cols; //! a distance between successive rows in bytes; includes the gap if any - size_t step; + CV_PROP size_t step; //! pointer to the data uchar* data; @@ -314,7 +351,7 @@ class CV_EXPORTS GpuMat Matrix is called continuous if its elements are stored continuously, that is, without gaps at the end of each row. */ -CV_EXPORTS void createContinuous(int rows, int cols, int type, OutputArray arr); +CV_EXPORTS_W void createContinuous(int rows, int cols, int type, OutputArray arr); /** @brief Ensures that the size of a matrix is big enough and the matrix has a proper type. @@ -325,11 +362,148 @@ CV_EXPORTS void createContinuous(int rows, int cols, int type, OutputArray arr); The function does not reallocate memory if the matrix has proper attributes already. */ -CV_EXPORTS void ensureSizeIsEnough(int rows, int cols, int type, OutputArray arr); +CV_EXPORTS_W void ensureSizeIsEnough(int rows, int cols, int type, OutputArray arr); + +/** @brief BufferPool for use with CUDA streams + +BufferPool utilizes Stream's allocator to create new buffers for GpuMat's. It is +only useful when enabled with #setBufferPoolUsage. + +@code + setBufferPoolUsage(true); +@endcode + +@note #setBufferPoolUsage must be called \em before any Stream declaration. + +Users may specify custom allocator for Stream and may implement their own stream based +functions utilizing the same underlying GPU memory management. + +If custom allocator is not specified, BufferPool utilizes StackAllocator by +default. StackAllocator allocates a chunk of GPU device memory beforehand, +and when GpuMat is declared later on, it is given the pre-allocated memory. +This kind of strategy reduces the number of calls for memory allocating APIs +such as cudaMalloc or cudaMallocPitch. + +Below is an example that utilizes BufferPool with StackAllocator: + +@code + #include + + using namespace cv; + using namespace cv::cuda + + int main() + { + setBufferPoolUsage(true); // Tell OpenCV that we are going to utilize BufferPool + setBufferPoolConfig(getDevice(), 1024 * 1024 * 64, 2); // Allocate 64 MB, 2 stacks (default is 10 MB, 5 stacks) + + Stream stream1, stream2; // Each stream uses 1 stack + BufferPool pool1(stream1), pool2(stream2); + + GpuMat d_src1 = pool1.getBuffer(4096, 4096, CV_8UC1); // 16MB + GpuMat d_dst1 = pool1.getBuffer(4096, 4096, CV_8UC3); // 48MB, pool1 is now full + + GpuMat d_src2 = pool2.getBuffer(1024, 1024, CV_8UC1); // 1MB + GpuMat d_dst2 = pool2.getBuffer(1024, 1024, CV_8UC3); // 3MB + + cvtColor(d_src1, d_dst1, cv::COLOR_GRAY2BGR, 0, stream1); + cvtColor(d_src2, d_dst2, cv::COLOR_GRAY2BGR, 0, stream2); + } +@endcode + +If we allocate another GpuMat on pool1 in the above example, it will be carried out by +the DefaultAllocator since the stack for pool1 is full. + +@code + GpuMat d_add1 = pool1.getBuffer(1024, 1024, CV_8UC1); // Stack for pool1 is full, memory is allocated with DefaultAllocator +@endcode + +If a third stream is declared in the above example, allocating with #getBuffer +within that stream will also be carried out by the DefaultAllocator because we've run out of +stacks. + +@code + Stream stream3; // Only 2 stacks were allocated, we've run out of stacks + BufferPool pool3(stream3); + GpuMat d_src3 = pool3.getBuffer(1024, 1024, CV_8UC1); // Memory is allocated with DefaultAllocator +@endcode + +@warning When utilizing StackAllocator, deallocation order is important. + +Just like a stack, deallocation must be done in LIFO order. Below is an example of +erroneous usage that violates LIFO rule. If OpenCV is compiled in Debug mode, this +sample code will emit CV_Assert error. + +@code + int main() + { + setBufferPoolUsage(true); // Tell OpenCV that we are going to utilize BufferPool + Stream stream; // A default size (10 MB) stack is allocated to this stream + BufferPool pool(stream); + + GpuMat mat1 = pool.getBuffer(1024, 1024, CV_8UC1); // Allocate mat1 (1MB) + GpuMat mat2 = pool.getBuffer(1024, 1024, CV_8UC1); // Allocate mat2 (1MB) + + mat1.release(); // erroneous usage : mat2 must be deallocated before mat1 + } +@endcode + +Since C++ local variables are destroyed in the reverse order of construction, +the code sample below satisfies the LIFO rule. Local GpuMat's are deallocated +and the corresponding memory is automatically returned to the pool for later usage. + +@code + int main() + { + setBufferPoolUsage(true); // Tell OpenCV that we are going to utilize BufferPool + setBufferPoolConfig(getDevice(), 1024 * 1024 * 64, 2); // Allocate 64 MB, 2 stacks (default is 10 MB, 5 stacks) + + Stream stream1, stream2; // Each stream uses 1 stack + BufferPool pool1(stream1), pool2(stream2); + + for (int i = 0; i < 10; i++) + { + GpuMat d_src1 = pool1.getBuffer(4096, 4096, CV_8UC1); // 16MB + GpuMat d_dst1 = pool1.getBuffer(4096, 4096, CV_8UC3); // 48MB, pool1 is now full + + GpuMat d_src2 = pool2.getBuffer(1024, 1024, CV_8UC1); // 1MB + GpuMat d_dst2 = pool2.getBuffer(1024, 1024, CV_8UC3); // 3MB + + d_src1.setTo(Scalar(i), stream1); + d_src2.setTo(Scalar(i), stream2); + + cvtColor(d_src1, d_dst1, cv::COLOR_GRAY2BGR, 0, stream1); + cvtColor(d_src2, d_dst2, cv::COLOR_GRAY2BGR, 0, stream2); + // The order of destruction of the local variables is: + // d_dst2 => d_src2 => d_dst1 => d_src1 + // LIFO rule is satisfied, this code runs without error + } + } +@endcode + */ +class CV_EXPORTS_W BufferPool +{ +public: + + //! Gets the BufferPool for the given stream. + explicit BufferPool(Stream& stream); + + //! Allocates a new GpuMat of given size and type. + CV_WRAP GpuMat getBuffer(int rows, int cols, int type); + + //! Allocates a new GpuMat of given size and type. + CV_WRAP GpuMat getBuffer(Size size, int type) { return getBuffer(size.height, size.width, type); } + + //! Returns the allocator associated with the stream. + CV_WRAP Ptr getAllocator() const { return allocator_; } + +private: + Ptr allocator_; +}; //! BufferPool management (must be called before Stream creation) -CV_EXPORTS void setBufferPoolUsage(bool on); -CV_EXPORTS void setBufferPoolConfig(int deviceId, size_t stackSize, int stackCount); +CV_EXPORTS_W void setBufferPoolUsage(bool on); +CV_EXPORTS_W void setBufferPoolConfig(int deviceId, size_t stackSize, int stackCount); //=================================================================================== // HostMem @@ -350,46 +524,46 @@ Its interface is also Mat-like but with additional memory type parameters. @note Allocation size of such memory types is usually limited. For more details, see *CUDA 2.2 Pinned Memory APIs* document or *CUDA C Programming Guide*. */ -class CV_EXPORTS HostMem +class CV_EXPORTS_W HostMem { public: enum AllocType { PAGE_LOCKED = 1, SHARED = 2, WRITE_COMBINED = 4 }; - static MatAllocator* getAllocator(AllocType alloc_type = PAGE_LOCKED); + static MatAllocator* getAllocator(HostMem::AllocType alloc_type = HostMem::AllocType::PAGE_LOCKED); - explicit HostMem(AllocType alloc_type = PAGE_LOCKED); + CV_WRAP explicit HostMem(HostMem::AllocType alloc_type = HostMem::AllocType::PAGE_LOCKED); HostMem(const HostMem& m); - HostMem(int rows, int cols, int type, AllocType alloc_type = PAGE_LOCKED); - HostMem(Size size, int type, AllocType alloc_type = PAGE_LOCKED); + CV_WRAP HostMem(int rows, int cols, int type, HostMem::AllocType alloc_type = HostMem::AllocType::PAGE_LOCKED); + CV_WRAP HostMem(Size size, int type, HostMem::AllocType alloc_type = HostMem::AllocType::PAGE_LOCKED); //! creates from host memory with coping data - explicit HostMem(InputArray arr, AllocType alloc_type = PAGE_LOCKED); + CV_WRAP explicit HostMem(InputArray arr, HostMem::AllocType alloc_type = HostMem::AllocType::PAGE_LOCKED); ~HostMem(); HostMem& operator =(const HostMem& m); //! swaps with other smart pointer - void swap(HostMem& b); + CV_WRAP void swap(HostMem& b); //! returns deep copy of the matrix, i.e. the data is copied - HostMem clone() const; + CV_WRAP HostMem clone() const; //! allocates new matrix data unless the matrix already has specified size and type. - void create(int rows, int cols, int type); + CV_WRAP void create(int rows, int cols, int type); void create(Size size, int type); //! creates alternative HostMem header for the same data, with different //! number of channels and/or different number of rows - HostMem reshape(int cn, int rows = 0) const; + CV_WRAP HostMem reshape(int cn, int rows = 0) const; //! decrements reference counter and released memory if needed. void release(); //! returns matrix header with disabled reference counting for HostMem data. - Mat createMatHeader() const; + CV_WRAP Mat createMatHeader() const; /** @brief Maps CPU memory to GPU address space and creates the cuda::GpuMat header without reference counting for it. @@ -401,20 +575,20 @@ class CV_EXPORTS HostMem GpuMat createGpuMatHeader() const; // Please see cv::Mat for descriptions - bool isContinuous() const; - size_t elemSize() const; - size_t elemSize1() const; - int type() const; - int depth() const; - int channels() const; - size_t step1() const; - Size size() const; - bool empty() const; + CV_WRAP bool isContinuous() const; + CV_WRAP size_t elemSize() const; + CV_WRAP size_t elemSize1() const; + CV_WRAP int type() const; + CV_WRAP int depth() const; + CV_WRAP int channels() const; + CV_WRAP size_t step1() const; + CV_WRAP Size size() const; + CV_WRAP bool empty() const; // Please see cv::Mat for descriptions int flags; int rows, cols; - size_t step; + CV_PROP size_t step; uchar* data; int* refcount; @@ -429,13 +603,13 @@ class CV_EXPORTS HostMem @param m Input matrix. */ -CV_EXPORTS void registerPageLocked(Mat& m); +CV_EXPORTS_W void registerPageLocked(Mat& m); /** @brief Unmaps the memory of matrix and makes it pageable again. @param m Input matrix. */ -CV_EXPORTS void unregisterPageLocked(Mat& m); +CV_EXPORTS_W void unregisterPageLocked(Mat& m); //=================================================================================== // Stream @@ -447,9 +621,28 @@ CV_EXPORTS void unregisterPageLocked(Mat& m); functions use the constant GPU memory, and next call may update the memory before the previous one has been finished. But calling different operations asynchronously is safe because each operation has its own constant buffer. Memory copy/upload/download/set operations to the buffers you hold are -also safe. : +also safe. + +@note The Stream class is not thread-safe. Please use different Stream objects for different CPU threads. + +@code +void thread1() +{ + cv::cuda::Stream stream1; + cv::cuda::func1(..., stream1); +} + +void thread2() +{ + cv::cuda::Stream stream2; + cv::cuda::func2(..., stream2); +} +@endcode + +@note By default all CUDA routines are launched in Stream::Null() object, if the stream is not specified by user. +In multi-threading environment the stream objects must be passed explicitly (see previous note). */ -class CV_EXPORTS Stream +class CV_EXPORTS_W Stream { typedef void (Stream::*bool_type)() const; void this_type_does_not_support_comparisons() const {} @@ -458,19 +651,22 @@ class CV_EXPORTS Stream typedef void (*StreamCallback)(int status, void* userData); //! creates a new asynchronous stream - Stream(); + CV_WRAP Stream(); + + //! creates a new asynchronous stream with custom allocator + CV_WRAP Stream(const Ptr& allocator); /** @brief Returns true if the current stream queue is finished. Otherwise, it returns false. */ - bool queryIfComplete() const; + CV_WRAP bool queryIfComplete() const; /** @brief Blocks the current CPU thread until all operations in the stream are complete. */ - void waitForCompletion(); + CV_WRAP void waitForCompletion(); /** @brief Makes a compute stream wait on an event. */ - void waitEvent(const Event& event); + CV_WRAP void waitEvent(const Event& event); /** @brief Adds a callback to be called on the host after all currently enqueued items in the stream have completed. @@ -483,11 +679,14 @@ class CV_EXPORTS Stream void enqueueHostCallback(StreamCallback callback, void* userData); //! return Stream object for default CUDA stream - static Stream& Null(); + CV_WRAP static Stream& Null(); //! returns true if stream object is not default (!= 0) operator bool_type() const; + //! return Pointer to CUDA stream + CV_WRAP void* cudaPtr() const; + class Impl; private: @@ -499,7 +698,7 @@ class CV_EXPORTS Stream friend class DefaultDeviceInitializer; }; -class CV_EXPORTS Event +class CV_EXPORTS_W Event { public: enum CreateFlags @@ -510,19 +709,19 @@ class CV_EXPORTS Event INTERPROCESS = 0x04 /**< Event is suitable for interprocess use. DisableTiming must be set */ }; - explicit Event(CreateFlags flags = DEFAULT); + CV_WRAP explicit Event(Event::CreateFlags flags = Event::CreateFlags::DEFAULT); //! records an event - void record(Stream& stream = Stream::Null()); + CV_WRAP void record(Stream& stream = Stream::Null()); //! queries an event's status - bool queryIfComplete() const; + CV_WRAP bool queryIfComplete() const; //! waits for an event to complete - void waitForCompletion(); + CV_WRAP void waitForCompletion(); //! computes the elapsed time between events - static float elapsedTime(const Event& start, const Event& end); + CV_WRAP static float elapsedTime(const Event& start, const Event& end); class Impl; @@ -545,9 +744,10 @@ class CV_EXPORTS Event /** @brief Returns the number of installed CUDA-enabled devices. Use this function before any other CUDA functions calls. If OpenCV is compiled without CUDA support, -this function returns 0. +this function returns 0. If the CUDA driver is not installed, or is incompatible, this function +returns -1. */ -CV_EXPORTS int getCudaEnabledDeviceCount(); +CV_EXPORTS_W int getCudaEnabledDeviceCount(); /** @brief Sets a device and initializes it for the current thread. @@ -555,18 +755,18 @@ CV_EXPORTS int getCudaEnabledDeviceCount(); If the call of this function is omitted, a default device is initialized at the fist CUDA usage. */ -CV_EXPORTS void setDevice(int device); +CV_EXPORTS_W void setDevice(int device); /** @brief Returns the current device index set by cuda::setDevice or initialized by default. */ -CV_EXPORTS int getDevice(); +CV_EXPORTS_W int getDevice(); /** @brief Explicitly destroys and cleans up all resources associated with the current device in the current process. Any subsequent API call to this device will reinitialize the device. */ -CV_EXPORTS void resetDevice(); +CV_EXPORTS_W void resetDevice(); /** @brief Enumeration providing CUDA computing features. */ @@ -599,7 +799,7 @@ built for. According to the CUDA C Programming Guide Version 3.2: "PTX code produced for some specific compute capability can always be compiled to binary code of greater or equal compute capability". */ -class CV_EXPORTS TargetArchs +class CV_EXPORTS_W TargetArchs { public: /** @brief The following method checks whether the module was built with the support of the given feature: @@ -614,23 +814,23 @@ class CV_EXPORTS TargetArchs @param major Major compute capability version. @param minor Minor compute capability version. */ - static bool has(int major, int minor); - static bool hasPtx(int major, int minor); - static bool hasBin(int major, int minor); - - static bool hasEqualOrLessPtx(int major, int minor); - static bool hasEqualOrGreater(int major, int minor); - static bool hasEqualOrGreaterPtx(int major, int minor); - static bool hasEqualOrGreaterBin(int major, int minor); + CV_WRAP static bool has(int major, int minor); + CV_WRAP static bool hasPtx(int major, int minor); + CV_WRAP static bool hasBin(int major, int minor); + + CV_WRAP static bool hasEqualOrLessPtx(int major, int minor); + CV_WRAP static bool hasEqualOrGreater(int major, int minor); + CV_WRAP static bool hasEqualOrGreaterPtx(int major, int minor); + CV_WRAP static bool hasEqualOrGreaterBin(int major, int minor); }; /** @brief Class providing functionality for querying the specified GPU properties. */ -class CV_EXPORTS DeviceInfo +class CV_EXPORTS_W DeviceInfo { public: //! creates DeviceInfo object for the current GPU - DeviceInfo(); + CV_WRAP DeviceInfo(); /** @brief The constructors. @@ -639,68 +839,68 @@ class CV_EXPORTS DeviceInfo Constructs the DeviceInfo object for the specified device. If device_id parameter is missed, it constructs an object for the current device. */ - DeviceInfo(int device_id); + CV_WRAP DeviceInfo(int device_id); /** @brief Returns system index of the CUDA device starting with 0. */ - int deviceID() const; + CV_WRAP int deviceID() const; //! ASCII string identifying device const char* name() const; //! global memory available on device in bytes - size_t totalGlobalMem() const; + CV_WRAP size_t totalGlobalMem() const; //! shared memory available per block in bytes - size_t sharedMemPerBlock() const; + CV_WRAP size_t sharedMemPerBlock() const; //! 32-bit registers available per block - int regsPerBlock() const; + CV_WRAP int regsPerBlock() const; //! warp size in threads - int warpSize() const; + CV_WRAP int warpSize() const; //! maximum pitch in bytes allowed by memory copies - size_t memPitch() const; + CV_WRAP size_t memPitch() const; //! maximum number of threads per block - int maxThreadsPerBlock() const; + CV_WRAP int maxThreadsPerBlock() const; //! maximum size of each dimension of a block - Vec3i maxThreadsDim() const; + CV_WRAP Vec3i maxThreadsDim() const; //! maximum size of each dimension of a grid - Vec3i maxGridSize() const; + CV_WRAP Vec3i maxGridSize() const; //! clock frequency in kilohertz - int clockRate() const; + CV_WRAP int clockRate() const; //! constant memory available on device in bytes - size_t totalConstMem() const; + CV_WRAP size_t totalConstMem() const; //! major compute capability - int majorVersion() const; + CV_WRAP int majorVersion() const; //! minor compute capability - int minorVersion() const; + CV_WRAP int minorVersion() const; //! alignment requirement for textures - size_t textureAlignment() const; + CV_WRAP size_t textureAlignment() const; //! pitch alignment requirement for texture references bound to pitched memory - size_t texturePitchAlignment() const; + CV_WRAP size_t texturePitchAlignment() const; //! number of multiprocessors on device - int multiProcessorCount() const; + CV_WRAP int multiProcessorCount() const; //! specified whether there is a run time limit on kernels - bool kernelExecTimeoutEnabled() const; + CV_WRAP bool kernelExecTimeoutEnabled() const; //! device is integrated as opposed to discrete - bool integrated() const; + CV_WRAP bool integrated() const; //! device can map host memory with cudaHostAlloc/cudaHostGetDevicePointer - bool canMapHostMemory() const; + CV_WRAP bool canMapHostMemory() const; enum ComputeMode { @@ -711,108 +911,108 @@ class CV_EXPORTS DeviceInfo }; //! compute mode - ComputeMode computeMode() const; + CV_WRAP DeviceInfo::ComputeMode computeMode() const; //! maximum 1D texture size - int maxTexture1D() const; + CV_WRAP int maxTexture1D() const; //! maximum 1D mipmapped texture size - int maxTexture1DMipmap() const; + CV_WRAP int maxTexture1DMipmap() const; //! maximum size for 1D textures bound to linear memory - int maxTexture1DLinear() const; + CV_WRAP int maxTexture1DLinear() const; //! maximum 2D texture dimensions - Vec2i maxTexture2D() const; + CV_WRAP Vec2i maxTexture2D() const; //! maximum 2D mipmapped texture dimensions - Vec2i maxTexture2DMipmap() const; + CV_WRAP Vec2i maxTexture2DMipmap() const; //! maximum dimensions (width, height, pitch) for 2D textures bound to pitched memory - Vec3i maxTexture2DLinear() const; + CV_WRAP Vec3i maxTexture2DLinear() const; //! maximum 2D texture dimensions if texture gather operations have to be performed - Vec2i maxTexture2DGather() const; + CV_WRAP Vec2i maxTexture2DGather() const; //! maximum 3D texture dimensions - Vec3i maxTexture3D() const; + CV_WRAP Vec3i maxTexture3D() const; //! maximum Cubemap texture dimensions - int maxTextureCubemap() const; + CV_WRAP int maxTextureCubemap() const; //! maximum 1D layered texture dimensions - Vec2i maxTexture1DLayered() const; + CV_WRAP Vec2i maxTexture1DLayered() const; //! maximum 2D layered texture dimensions - Vec3i maxTexture2DLayered() const; + CV_WRAP Vec3i maxTexture2DLayered() const; //! maximum Cubemap layered texture dimensions - Vec2i maxTextureCubemapLayered() const; + CV_WRAP Vec2i maxTextureCubemapLayered() const; //! maximum 1D surface size - int maxSurface1D() const; + CV_WRAP int maxSurface1D() const; //! maximum 2D surface dimensions - Vec2i maxSurface2D() const; + CV_WRAP Vec2i maxSurface2D() const; //! maximum 3D surface dimensions - Vec3i maxSurface3D() const; + CV_WRAP Vec3i maxSurface3D() const; //! maximum 1D layered surface dimensions - Vec2i maxSurface1DLayered() const; + CV_WRAP Vec2i maxSurface1DLayered() const; //! maximum 2D layered surface dimensions - Vec3i maxSurface2DLayered() const; + CV_WRAP Vec3i maxSurface2DLayered() const; //! maximum Cubemap surface dimensions - int maxSurfaceCubemap() const; + CV_WRAP int maxSurfaceCubemap() const; //! maximum Cubemap layered surface dimensions - Vec2i maxSurfaceCubemapLayered() const; + CV_WRAP Vec2i maxSurfaceCubemapLayered() const; //! alignment requirements for surfaces - size_t surfaceAlignment() const; + CV_WRAP size_t surfaceAlignment() const; //! device can possibly execute multiple kernels concurrently - bool concurrentKernels() const; + CV_WRAP bool concurrentKernels() const; //! device has ECC support enabled - bool ECCEnabled() const; + CV_WRAP bool ECCEnabled() const; //! PCI bus ID of the device - int pciBusID() const; + CV_WRAP int pciBusID() const; //! PCI device ID of the device - int pciDeviceID() const; + CV_WRAP int pciDeviceID() const; //! PCI domain ID of the device - int pciDomainID() const; + CV_WRAP int pciDomainID() const; //! true if device is a Tesla device using TCC driver, false otherwise - bool tccDriver() const; + CV_WRAP bool tccDriver() const; //! number of asynchronous engines - int asyncEngineCount() const; + CV_WRAP int asyncEngineCount() const; //! device shares a unified address space with the host - bool unifiedAddressing() const; + CV_WRAP bool unifiedAddressing() const; //! peak memory clock frequency in kilohertz - int memoryClockRate() const; + CV_WRAP int memoryClockRate() const; //! global memory bus width in bits - int memoryBusWidth() const; + CV_WRAP int memoryBusWidth() const; //! size of L2 cache in bytes - int l2CacheSize() const; + CV_WRAP int l2CacheSize() const; //! maximum resident threads per multiprocessor - int maxThreadsPerMultiProcessor() const; + CV_WRAP int maxThreadsPerMultiProcessor() const; //! gets free and total device memory - void queryMemory(size_t& totalMemory, size_t& freeMemory) const; - size_t freeMemory() const; - size_t totalMemory() const; + CV_WRAP void queryMemory(size_t& totalMemory, size_t& freeMemory) const; + CV_WRAP size_t freeMemory() const; + CV_WRAP size_t totalMemory() const; /** @brief Provides information on CUDA feature support. @@ -827,14 +1027,23 @@ class CV_EXPORTS DeviceInfo This function returns true if the CUDA module can be run on the specified device. Otherwise, it returns false . */ - bool isCompatible() const; + CV_WRAP bool isCompatible() const; private: int device_id_; }; -CV_EXPORTS void printCudaDeviceInfo(int device); -CV_EXPORTS void printShortCudaDeviceInfo(int device); +CV_EXPORTS_W void printCudaDeviceInfo(int device); +CV_EXPORTS_W void printShortCudaDeviceInfo(int device); + +/** @brief Converts an array to half precision floating number. + +@param _src input array. +@param _dst output array. +@param stream Stream for the asynchronous version. +@sa convertFp16 +*/ +CV_EXPORTS void convertFp16(InputArray _src, OutputArray _dst, Stream& stream = Stream::Null()); //! @} cudacore_init @@ -843,4 +1052,4 @@ CV_EXPORTS void printShortCudaDeviceInfo(int device); #include "opencv2/core/cuda.inl.hpp" -#endif /* __OPENCV_CORE_CUDA_HPP__ */ +#endif /* OPENCV_CORE_CUDA_HPP */ diff --git a/IPL/include/opencv/opencv2/core/cuda.inl.hpp b/IPL/include/opencv/opencv2/core/cuda.inl.hpp index 01dc6d7..30fc0ae 100644 --- a/IPL/include/opencv/opencv2/core/cuda.inl.hpp +++ b/IPL/include/opencv/opencv2/core/cuda.inl.hpp @@ -41,8 +41,8 @@ // //M*/ -#ifndef __OPENCV_CORE_CUDAINL_HPP__ -#define __OPENCV_CORE_CUDAINL_HPP__ +#ifndef OPENCV_CORE_CUDAINL_HPP +#define OPENCV_CORE_CUDAINL_HPP #include "opencv2/core/cuda.hpp" @@ -343,6 +343,12 @@ bool GpuMat::empty() const return data == 0; } +inline +void* GpuMat::cudaPtr() const +{ + return data; +} + static inline GpuMat createContinuous(int rows, int cols, int type) { @@ -628,4 +634,4 @@ Mat::Mat(const cuda::GpuMat& m) //! @endcond -#endif // __OPENCV_CORE_CUDAINL_HPP__ +#endif // OPENCV_CORE_CUDAINL_HPP diff --git a/IPL/include/opencv/opencv2/core/cuda/block.hpp b/IPL/include/opencv/opencv2/core/cuda/block.hpp index 0c6f063..c277f0e 100644 --- a/IPL/include/opencv/opencv2/core/cuda/block.hpp +++ b/IPL/include/opencv/opencv2/core/cuda/block.hpp @@ -40,8 +40,8 @@ // //M*/ -#ifndef __OPENCV_CUDA_DEVICE_BLOCK_HPP__ -#define __OPENCV_CUDA_DEVICE_BLOCK_HPP__ +#ifndef OPENCV_CUDA_DEVICE_BLOCK_HPP +#define OPENCV_CUDA_DEVICE_BLOCK_HPP /** @file * @deprecated Use @ref cudev instead. @@ -106,7 +106,7 @@ namespace cv { namespace cuda { namespace device } template - static __device__ __forceinline__ void transfrom(InIt beg, InIt end, OutIt out, UnOp op) + static __device__ __forceinline__ void transform(InIt beg, InIt end, OutIt out, UnOp op) { int STRIDE = stride(); InIt t = beg + flattenedThreadId(); @@ -117,7 +117,7 @@ namespace cv { namespace cuda { namespace device } template - static __device__ __forceinline__ void transfrom(InIt1 beg1, InIt1 end1, InIt2 beg2, OutIt out, BinOp op) + static __device__ __forceinline__ void transform(InIt1 beg1, InIt1 end1, InIt2 beg2, OutIt out, BinOp op) { int STRIDE = stride(); InIt1 t1 = beg1 + flattenedThreadId(); @@ -208,4 +208,4 @@ namespace cv { namespace cuda { namespace device //! @endcond -#endif /* __OPENCV_CUDA_DEVICE_BLOCK_HPP__ */ +#endif /* OPENCV_CUDA_DEVICE_BLOCK_HPP */ diff --git a/IPL/include/opencv/opencv2/core/cuda/border_interpolate.hpp b/IPL/include/opencv/opencv2/core/cuda/border_interpolate.hpp index a204155..874f705 100644 --- a/IPL/include/opencv/opencv2/core/cuda/border_interpolate.hpp +++ b/IPL/include/opencv/opencv2/core/cuda/border_interpolate.hpp @@ -40,8 +40,8 @@ // //M*/ -#ifndef __OPENCV_CUDA_BORDER_INTERPOLATE_HPP__ -#define __OPENCV_CUDA_BORDER_INTERPOLATE_HPP__ +#ifndef OPENCV_CUDA_BORDER_INTERPOLATE_HPP +#define OPENCV_CUDA_BORDER_INTERPOLATE_HPP #include "saturate_cast.hpp" #include "vec_traits.hpp" @@ -719,4 +719,4 @@ namespace cv { namespace cuda { namespace device //! @endcond -#endif // __OPENCV_CUDA_BORDER_INTERPOLATE_HPP__ +#endif // OPENCV_CUDA_BORDER_INTERPOLATE_HPP diff --git a/IPL/include/opencv/opencv2/core/cuda/color.hpp b/IPL/include/opencv/opencv2/core/cuda/color.hpp index 6faf8c9..dcce280 100644 --- a/IPL/include/opencv/opencv2/core/cuda/color.hpp +++ b/IPL/include/opencv/opencv2/core/cuda/color.hpp @@ -40,8 +40,8 @@ // //M*/ -#ifndef __OPENCV_CUDA_COLOR_HPP__ -#define __OPENCV_CUDA_COLOR_HPP__ +#ifndef OPENCV_CUDA_COLOR_HPP +#define OPENCV_CUDA_COLOR_HPP #include "detail/color_detail.hpp" @@ -306,4 +306,4 @@ namespace cv { namespace cuda { namespace device //! @endcond -#endif // __OPENCV_CUDA_BORDER_INTERPOLATE_HPP__ +#endif // OPENCV_CUDA_COLOR_HPP diff --git a/IPL/include/opencv/opencv2/core/cuda/common.hpp b/IPL/include/opencv/opencv2/core/cuda/common.hpp index b93c3ef..14b1f3f 100644 --- a/IPL/include/opencv/opencv2/core/cuda/common.hpp +++ b/IPL/include/opencv/opencv2/core/cuda/common.hpp @@ -40,8 +40,8 @@ // //M*/ -#ifndef __OPENCV_CUDA_COMMON_HPP__ -#define __OPENCV_CUDA_COMMON_HPP__ +#ifndef OPENCV_CUDA_COMMON_HPP +#define OPENCV_CUDA_COMMON_HPP #include #include "opencv2/core/cuda_types.hpp" @@ -106,4 +106,4 @@ namespace cv { namespace cuda //! @endcond -#endif // __OPENCV_CUDA_COMMON_HPP__ +#endif // OPENCV_CUDA_COMMON_HPP diff --git a/IPL/include/opencv/opencv2/core/cuda/datamov_utils.hpp b/IPL/include/opencv/opencv2/core/cuda/datamov_utils.hpp index bb02cf9..6820d0f 100644 --- a/IPL/include/opencv/opencv2/core/cuda/datamov_utils.hpp +++ b/IPL/include/opencv/opencv2/core/cuda/datamov_utils.hpp @@ -40,8 +40,8 @@ // //M*/ -#ifndef __OPENCV_CUDA_DATAMOV_UTILS_HPP__ -#define __OPENCV_CUDA_DATAMOV_UTILS_HPP__ +#ifndef OPENCV_CUDA_DATAMOV_UTILS_HPP +#define OPENCV_CUDA_DATAMOV_UTILS_HPP #include "common.hpp" @@ -110,4 +110,4 @@ namespace cv { namespace cuda { namespace device //! @endcond -#endif // __OPENCV_CUDA_DATAMOV_UTILS_HPP__ +#endif // OPENCV_CUDA_DATAMOV_UTILS_HPP diff --git a/IPL/include/opencv/opencv2/core/cuda/detail/color_detail.hpp b/IPL/include/opencv/opencv2/core/cuda/detail/color_detail.hpp index 1151806..f4b4796 100644 --- a/IPL/include/opencv/opencv2/core/cuda/detail/color_detail.hpp +++ b/IPL/include/opencv/opencv2/core/cuda/detail/color_detail.hpp @@ -40,8 +40,8 @@ // //M*/ -#ifndef __OPENCV_CUDA_COLOR_DETAIL_HPP__ -#define __OPENCV_CUDA_COLOR_DETAIL_HPP__ +#ifndef OPENCV_CUDA_COLOR_DETAIL_HPP +#define OPENCV_CUDA_COLOR_DETAIL_HPP #include "../common.hpp" #include "../vec_traits.hpp" @@ -92,13 +92,51 @@ namespace cv { namespace cuda { namespace device return vec.w; } + //constants for conversion from/to RGB and Gray, YUV, YCrCb according to BT.601 + constexpr float B2YF = 0.114f; + constexpr float G2YF = 0.587f; + constexpr float R2YF = 0.299f; + + //to YCbCr + constexpr float YCBF = 0.564f; // == 1/2/(1-B2YF) + constexpr float YCRF = 0.713f; // == 1/2/(1-R2YF) + const int YCBI = 9241; // == YCBF*16384 + const int YCRI = 11682; // == YCRF*16384 + //to YUV + constexpr float B2UF = 0.492f; + constexpr float R2VF = 0.877f; + const int B2UI = 8061; // == B2UF*16384 + const int R2VI = 14369; // == R2VF*16384 + //from YUV + constexpr float U2BF = 2.032f; + constexpr float U2GF = -0.395f; + constexpr float V2GF = -0.581f; + constexpr float V2RF = 1.140f; + const int U2BI = 33292; + const int U2GI = -6472; + const int V2GI = -9519; + const int V2RI = 18678; + //from YCrCb + constexpr float CB2BF = 1.773f; + constexpr float CB2GF = -0.344f; + constexpr float CR2GF = -0.714f; + constexpr float CR2RF = 1.403f; + const int CB2BI = 29049; + const int CB2GI = -5636; + const int CR2GI = -11698; + const int CR2RI = 22987; + enum { yuv_shift = 14, xyz_shift = 12, + gray_shift = 15, R2Y = 4899, G2Y = 9617, B2Y = 1868, + RY15 = 9798, // == R2YF*32768 + 0.5 + GY15 = 19235, // == G2YF*32768 + 0.5 + BY15 = 3735, // == B2YF*32768 + 0.5 BLOCK_SIZE = 256 }; } @@ -406,7 +444,7 @@ namespace cv { namespace cuda { namespace device { static __device__ __forceinline__ uchar cvt(uint t) { - return (uchar)CV_DESCALE(((t << 3) & 0xf8) * B2Y + ((t >> 3) & 0xfc) * G2Y + ((t >> 8) & 0xf8) * R2Y, yuv_shift); + return (uchar)CV_DESCALE(((t << 3) & 0xf8) * BY15 + ((t >> 3) & 0xfc) * GY15 + ((t >> 8) & 0xf8) * RY15, gray_shift); } }; @@ -414,7 +452,7 @@ namespace cv { namespace cuda { namespace device { static __device__ __forceinline__ uchar cvt(uint t) { - return (uchar)CV_DESCALE(((t << 3) & 0xf8) * B2Y + ((t >> 2) & 0xf8) * G2Y + ((t >> 7) & 0xf8) * R2Y, yuv_shift); + return (uchar)CV_DESCALE(((t << 3) & 0xf8) * BY15 + ((t >> 2) & 0xf8) * GY15 + ((t >> 7) & 0xf8) * RY15, gray_shift); } }; @@ -443,7 +481,7 @@ namespace cv { namespace cuda { namespace device { template static __device__ __forceinline__ T RGB2GrayConvert(const T* src) { - return (T)CV_DESCALE((unsigned)(src[bidx] * B2Y + src[1] * G2Y + src[bidx^2] * R2Y), yuv_shift); + return (T)CV_DESCALE((unsigned)(src[bidx] * BY15 + src[1] * GY15 + src[bidx^2] * RY15), gray_shift); } template static __device__ __forceinline__ uchar RGB2GrayConvert(uint src) @@ -451,12 +489,12 @@ namespace cv { namespace cuda { namespace device uint b = 0xffu & (src >> (bidx * 8)); uint g = 0xffu & (src >> 8); uint r = 0xffu & (src >> ((bidx ^ 2) * 8)); - return CV_DESCALE((uint)(b * B2Y + g * G2Y + r * R2Y), yuv_shift); + return CV_DESCALE((uint)(b * BY15 + g * GY15 + r * RY15), gray_shift); } template static __device__ __forceinline__ float RGB2GrayConvert(const float* src) { - return src[bidx] * 0.114f + src[1] * 0.587f + src[bidx^2] * 0.299f; + return src[bidx] * B2YF + src[1] * G2YF + src[bidx^2] * R2YF; } template struct RGB2Gray : unary_function::vec_type, T> @@ -494,8 +532,8 @@ namespace cv { namespace cuda { namespace device namespace color_detail { - __constant__ float c_RGB2YUVCoeffs_f[5] = { 0.114f, 0.587f, 0.299f, 0.492f, 0.877f }; - __constant__ int c_RGB2YUVCoeffs_i[5] = { B2Y, G2Y, R2Y, 8061, 14369 }; + __constant__ float c_RGB2YUVCoeffs_f[5] = { B2YF, G2YF, R2YF, B2UF, R2VF }; + __constant__ int c_RGB2YUVCoeffs_i[5] = { B2Y, G2Y, R2Y, B2UI, R2VI }; template static __device__ void RGB2YUVConvert(const T* src, D& dst) { @@ -543,8 +581,8 @@ namespace cv { namespace cuda { namespace device namespace color_detail { - __constant__ float c_YUV2RGBCoeffs_f[5] = { 2.032f, -0.395f, -0.581f, 1.140f }; - __constant__ int c_YUV2RGBCoeffs_i[5] = { 33292, -6472, -9519, 18678 }; + __constant__ float c_YUV2RGBCoeffs_f[5] = { U2BF, U2GF, V2GF, V2RF }; + __constant__ int c_YUV2RGBCoeffs_i[5] = { U2BI, U2GI, V2GI, V2RI }; template static __device__ void YUV2RGBConvert(const T& src, D* dst) { @@ -633,8 +671,8 @@ namespace cv { namespace cuda { namespace device namespace color_detail { - __constant__ float c_RGB2YCrCbCoeffs_f[5] = {0.299f, 0.587f, 0.114f, 0.713f, 0.564f}; - __constant__ int c_RGB2YCrCbCoeffs_i[5] = {R2Y, G2Y, B2Y, 11682, 9241}; + __constant__ float c_RGB2YCrCbCoeffs_f[5] = {R2YF, G2YF, B2YF, YCRF, YCBF}; + __constant__ int c_RGB2YCrCbCoeffs_i[5] = {R2Y, G2Y, B2Y, YCRI, YCBI}; template static __device__ void RGB2YCrCbConvert(const T* src, D& dst) { @@ -710,8 +748,8 @@ namespace cv { namespace cuda { namespace device namespace color_detail { - __constant__ float c_YCrCb2RGBCoeffs_f[5] = {1.403f, -0.714f, -0.344f, 1.773f}; - __constant__ int c_YCrCb2RGBCoeffs_i[5] = {22987, -11698, -5636, 29049}; + __constant__ float c_YCrCb2RGBCoeffs_f[5] = {CR2RF, CR2GF, CB2GF, CB2BF}; + __constant__ int c_YCrCb2RGBCoeffs_i[5] = {CR2RI, CR2GI, CB2GI, CB2BI}; template static __device__ void YCrCb2RGBConvert(const T& src, D* dst) { @@ -1977,4 +2015,4 @@ namespace cv { namespace cuda { namespace device //! @endcond -#endif // __OPENCV_CUDA_COLOR_DETAIL_HPP__ +#endif // OPENCV_CUDA_COLOR_DETAIL_HPP diff --git a/IPL/include/opencv/opencv2/core/cuda/detail/reduce.hpp b/IPL/include/opencv/opencv2/core/cuda/detail/reduce.hpp index 44400c8..8af20b0 100644 --- a/IPL/include/opencv/opencv2/core/cuda/detail/reduce.hpp +++ b/IPL/include/opencv/opencv2/core/cuda/detail/reduce.hpp @@ -40,8 +40,8 @@ // //M*/ -#ifndef __OPENCV_CUDA_REDUCE_DETAIL_HPP__ -#define __OPENCV_CUDA_REDUCE_DETAIL_HPP__ +#ifndef OPENCV_CUDA_REDUCE_DETAIL_HPP +#define OPENCV_CUDA_REDUCE_DETAIL_HPP #include #include "../warp.hpp" @@ -276,8 +276,8 @@ namespace cv { namespace cuda { namespace device static __device__ void reduce(Pointer smem, Reference val, unsigned int tid, Op op) { #if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300 - (void) smem; - (void) tid; + CV_UNUSED(smem); + CV_UNUSED(tid); Unroll::loopShfl(val, op, N); #else @@ -362,4 +362,4 @@ namespace cv { namespace cuda { namespace device //! @endcond -#endif // __OPENCV_CUDA_REDUCE_DETAIL_HPP__ +#endif // OPENCV_CUDA_REDUCE_DETAIL_HPP diff --git a/IPL/include/opencv/opencv2/core/cuda/detail/reduce_key_val.hpp b/IPL/include/opencv/opencv2/core/cuda/detail/reduce_key_val.hpp index bab85d7..df37c17 100644 --- a/IPL/include/opencv/opencv2/core/cuda/detail/reduce_key_val.hpp +++ b/IPL/include/opencv/opencv2/core/cuda/detail/reduce_key_val.hpp @@ -40,8 +40,8 @@ // //M*/ -#ifndef __OPENCV_CUDA_PRED_VAL_REDUCE_DETAIL_HPP__ -#define __OPENCV_CUDA_PRED_VAL_REDUCE_DETAIL_HPP__ +#ifndef OPENCV_CUDA_PRED_VAL_REDUCE_DETAIL_HPP +#define OPENCV_CUDA_PRED_VAL_REDUCE_DETAIL_HPP #include #include "../warp.hpp" @@ -402,9 +402,9 @@ namespace cv { namespace cuda { namespace device static __device__ void reduce(KP skeys, KR key, VP svals, VR val, unsigned int tid, Cmp cmp) { #if 0 // __CUDA_ARCH__ >= 300 - (void) skeys; - (void) svals; - (void) tid; + CV_UNUSED(skeys); + CV_UNUSED(svals); + CV_UNUSED(tid); Unroll::loopShfl(key, val, cmp, N); #else @@ -499,4 +499,4 @@ namespace cv { namespace cuda { namespace device //! @endcond -#endif // __OPENCV_CUDA_PRED_VAL_REDUCE_DETAIL_HPP__ +#endif // OPENCV_CUDA_PRED_VAL_REDUCE_DETAIL_HPP diff --git a/IPL/include/opencv/opencv2/core/cuda/detail/transform_detail.hpp b/IPL/include/opencv/opencv2/core/cuda/detail/transform_detail.hpp index 96031c8..1919848 100644 --- a/IPL/include/opencv/opencv2/core/cuda/detail/transform_detail.hpp +++ b/IPL/include/opencv/opencv2/core/cuda/detail/transform_detail.hpp @@ -40,8 +40,8 @@ // //M*/ -#ifndef __OPENCV_CUDA_TRANSFORM_DETAIL_HPP__ -#define __OPENCV_CUDA_TRANSFORM_DETAIL_HPP__ +#ifndef OPENCV_CUDA_TRANSFORM_DETAIL_HPP +#define OPENCV_CUDA_TRANSFORM_DETAIL_HPP #include "../common.hpp" #include "../vec_traits.hpp" @@ -223,11 +223,7 @@ namespace cv { namespace cuda { namespace device if (x_shifted + ft::smart_shift - 1 < src_.cols) { const read_type src_n_el = ((const read_type*)src)[x]; - write_type dst_n_el = ((const write_type*)dst)[x]; - - OpUnroller::unroll(src_n_el, dst_n_el, mask, op, x_shifted, y); - - ((write_type*)dst)[x] = dst_n_el; + OpUnroller::unroll(src_n_el, ((write_type*)dst)[x], mask, op, x_shifted, y); } else { @@ -275,11 +271,8 @@ namespace cv { namespace cuda { namespace device { const read_type1 src1_n_el = ((const read_type1*)src1)[x]; const read_type2 src2_n_el = ((const read_type2*)src2)[x]; - write_type dst_n_el = ((const write_type*)dst)[x]; - - OpUnroller::unroll(src1_n_el, src2_n_el, dst_n_el, mask, op, x_shifted, y); - ((write_type*)dst)[x] = dst_n_el; + OpUnroller::unroll(src1_n_el, src2_n_el, ((write_type*)dst)[x], mask, op, x_shifted, y); } else { @@ -396,4 +389,4 @@ namespace cv { namespace cuda { namespace device //! @endcond -#endif // __OPENCV_CUDA_TRANSFORM_DETAIL_HPP__ +#endif // OPENCV_CUDA_TRANSFORM_DETAIL_HPP diff --git a/IPL/include/opencv/opencv2/core/cuda/detail/type_traits_detail.hpp b/IPL/include/opencv/opencv2/core/cuda/detail/type_traits_detail.hpp index 3463c78..a78bd2c 100644 --- a/IPL/include/opencv/opencv2/core/cuda/detail/type_traits_detail.hpp +++ b/IPL/include/opencv/opencv2/core/cuda/detail/type_traits_detail.hpp @@ -40,8 +40,8 @@ // //M*/ -#ifndef __OPENCV_CUDA_TYPE_TRAITS_DETAIL_HPP__ -#define __OPENCV_CUDA_TYPE_TRAITS_DETAIL_HPP__ +#ifndef OPENCV_CUDA_TYPE_TRAITS_DETAIL_HPP +#define OPENCV_CUDA_TYPE_TRAITS_DETAIL_HPP #include "../common.hpp" #include "../vec_traits.hpp" @@ -188,4 +188,4 @@ namespace cv { namespace cuda { namespace device //! @endcond -#endif // __OPENCV_CUDA_TYPE_TRAITS_DETAIL_HPP__ +#endif // OPENCV_CUDA_TYPE_TRAITS_DETAIL_HPP diff --git a/IPL/include/opencv/opencv2/core/cuda/detail/vec_distance_detail.hpp b/IPL/include/opencv/opencv2/core/cuda/detail/vec_distance_detail.hpp index 9ca85a5..8283a99 100644 --- a/IPL/include/opencv/opencv2/core/cuda/detail/vec_distance_detail.hpp +++ b/IPL/include/opencv/opencv2/core/cuda/detail/vec_distance_detail.hpp @@ -40,8 +40,8 @@ // //M*/ -#ifndef __OPENCV_CUDA_VEC_DISTANCE_DETAIL_HPP__ -#define __OPENCV_CUDA_VEC_DISTANCE_DETAIL_HPP__ +#ifndef OPENCV_CUDA_VEC_DISTANCE_DETAIL_HPP +#define OPENCV_CUDA_VEC_DISTANCE_DETAIL_HPP #include "../datamov_utils.hpp" @@ -118,4 +118,4 @@ namespace cv { namespace cuda { namespace device //! @endcond -#endif // __OPENCV_CUDA_VEC_DISTANCE_DETAIL_HPP__ +#endif // OPENCV_CUDA_VEC_DISTANCE_DETAIL_HPP diff --git a/IPL/include/opencv/opencv2/core/cuda/dynamic_smem.hpp b/IPL/include/opencv/opencv2/core/cuda/dynamic_smem.hpp index 3488463..42570c6 100644 --- a/IPL/include/opencv/opencv2/core/cuda/dynamic_smem.hpp +++ b/IPL/include/opencv/opencv2/core/cuda/dynamic_smem.hpp @@ -40,8 +40,8 @@ // //M*/ -#ifndef __OPENCV_CUDA_DYNAMIC_SMEM_HPP__ -#define __OPENCV_CUDA_DYNAMIC_SMEM_HPP__ +#ifndef OPENCV_CUDA_DYNAMIC_SMEM_HPP +#define OPENCV_CUDA_DYNAMIC_SMEM_HPP /** @file * @deprecated Use @ref cudev instead. @@ -85,4 +85,4 @@ namespace cv { namespace cuda { namespace device //! @endcond -#endif // __OPENCV_CUDA_DYNAMIC_SMEM_HPP__ +#endif // OPENCV_CUDA_DYNAMIC_SMEM_HPP diff --git a/IPL/include/opencv/opencv2/core/cuda/emulation.hpp b/IPL/include/opencv/opencv2/core/cuda/emulation.hpp index d346865..17dc117 100644 --- a/IPL/include/opencv/opencv2/core/cuda/emulation.hpp +++ b/IPL/include/opencv/opencv2/core/cuda/emulation.hpp @@ -177,8 +177,8 @@ namespace cv { namespace cuda { namespace device } while (assumed != old); return __longlong_as_double(old); #else - (void) address; - (void) val; + CV_UNUSED(address); + CV_UNUSED(val); return 0.0; #endif } @@ -199,8 +199,8 @@ namespace cv { namespace cuda { namespace device } while (assumed != old); return __int_as_float(old); #else - (void) address; - (void) val; + CV_UNUSED(address); + CV_UNUSED(val); return 0.0f; #endif } @@ -216,8 +216,8 @@ namespace cv { namespace cuda { namespace device } while (assumed != old); return __longlong_as_double(old); #else - (void) address; - (void) val; + CV_UNUSED(address); + CV_UNUSED(val); return 0.0; #endif } @@ -238,8 +238,8 @@ namespace cv { namespace cuda { namespace device } while (assumed != old); return __int_as_float(old); #else - (void) address; - (void) val; + CV_UNUSED(address); + CV_UNUSED(val); return 0.0f; #endif } @@ -255,8 +255,8 @@ namespace cv { namespace cuda { namespace device } while (assumed != old); return __longlong_as_double(old); #else - (void) address; - (void) val; + CV_UNUSED(address); + CV_UNUSED(val); return 0.0; #endif } diff --git a/IPL/include/opencv/opencv2/core/cuda/filters.hpp b/IPL/include/opencv/opencv2/core/cuda/filters.hpp index 9adc00c..bb94212 100644 --- a/IPL/include/opencv/opencv2/core/cuda/filters.hpp +++ b/IPL/include/opencv/opencv2/core/cuda/filters.hpp @@ -40,8 +40,8 @@ // //M*/ -#ifndef __OPENCV_CUDA_FILTERS_HPP__ -#define __OPENCV_CUDA_FILTERS_HPP__ +#ifndef OPENCV_CUDA_FILTERS_HPP +#define OPENCV_CUDA_FILTERS_HPP #include "saturate_cast.hpp" #include "vec_traits.hpp" @@ -64,8 +64,8 @@ namespace cv { namespace cuda { namespace device explicit __host__ __device__ __forceinline__ PointFilter(const Ptr2D& src_, float fx = 0.f, float fy = 0.f) : src(src_) { - (void)fx; - (void)fy; + CV_UNUSED(fx); + CV_UNUSED(fy); } __device__ __forceinline__ elem_type operator ()(float y, float x) const @@ -84,8 +84,8 @@ namespace cv { namespace cuda { namespace device explicit __host__ __device__ __forceinline__ LinearFilter(const Ptr2D& src_, float fx = 0.f, float fy = 0.f) : src(src_) { - (void)fx; - (void)fy; + CV_UNUSED(fx); + CV_UNUSED(fy); } __device__ __forceinline__ elem_type operator ()(float y, float x) const { @@ -125,8 +125,8 @@ namespace cv { namespace cuda { namespace device explicit __host__ __device__ __forceinline__ CubicFilter(const Ptr2D& src_, float fx = 0.f, float fy = 0.f) : src(src_) { - (void)fx; - (void)fy; + CV_UNUSED(fx); + CV_UNUSED(fy); } static __device__ __forceinline__ float bicubicCoeff(float x_) @@ -283,4 +283,4 @@ namespace cv { namespace cuda { namespace device //! @endcond -#endif // __OPENCV_CUDA_FILTERS_HPP__ +#endif // OPENCV_CUDA_FILTERS_HPP diff --git a/IPL/include/opencv/opencv2/core/cuda/funcattrib.hpp b/IPL/include/opencv/opencv2/core/cuda/funcattrib.hpp index fbb236b..f582080 100644 --- a/IPL/include/opencv/opencv2/core/cuda/funcattrib.hpp +++ b/IPL/include/opencv/opencv2/core/cuda/funcattrib.hpp @@ -40,8 +40,8 @@ // //M*/ -#ifndef __OPENCV_CUDA_DEVICE_FUNCATTRIB_HPP_ -#define __OPENCV_CUDA_DEVICE_FUNCATTRIB_HPP_ +#ifndef OPENCV_CUDA_DEVICE_FUNCATTRIB_HPP +#define OPENCV_CUDA_DEVICE_FUNCATTRIB_HPP #include @@ -76,4 +76,4 @@ namespace cv { namespace cuda { namespace device //! @endcond -#endif /* __OPENCV_CUDA_DEVICE_FUNCATTRIB_HPP_ */ +#endif /* OPENCV_CUDA_DEVICE_FUNCATTRIB_HPP */ diff --git a/IPL/include/opencv/opencv2/core/cuda/functional.hpp b/IPL/include/opencv/opencv2/core/cuda/functional.hpp index ed3943d..9f53d87 100644 --- a/IPL/include/opencv/opencv2/core/cuda/functional.hpp +++ b/IPL/include/opencv/opencv2/core/cuda/functional.hpp @@ -40,14 +40,13 @@ // //M*/ -#ifndef __OPENCV_CUDA_FUNCTIONAL_HPP__ -#define __OPENCV_CUDA_FUNCTIONAL_HPP__ +#ifndef OPENCV_CUDA_FUNCTIONAL_HPP +#define OPENCV_CUDA_FUNCTIONAL_HPP #include #include "saturate_cast.hpp" #include "vec_traits.hpp" #include "type_traits.hpp" -#include "device_functions.h" /** @file * @deprecated Use @ref cudev instead. @@ -58,8 +57,17 @@ namespace cv { namespace cuda { namespace device { // Function Objects - template struct unary_function : public std::unary_function {}; - template struct binary_function : public std::binary_function {}; + template struct unary_function + { + typedef Argument argument_type; + typedef Result result_type; + }; + template struct binary_function + { + typedef Argument1 first_argument_type; + typedef Argument2 second_argument_type; + typedef Result result_type; + }; // Arithmetic Operations template struct plus : binary_function @@ -583,7 +591,7 @@ namespace cv { namespace cuda { namespace device template struct thresh_trunc_func : unary_function { - explicit __host__ __device__ __forceinline__ thresh_trunc_func(T thresh_, T maxVal_ = 0) : thresh(thresh_) {(void)maxVal_;} + explicit __host__ __device__ __forceinline__ thresh_trunc_func(T thresh_, T maxVal_ = 0) : thresh(thresh_) {CV_UNUSED(maxVal_);} __device__ __forceinline__ T operator()(typename TypeTraits::ParameterType src) const { @@ -599,7 +607,7 @@ namespace cv { namespace cuda { namespace device template struct thresh_to_zero_func : unary_function { - explicit __host__ __device__ __forceinline__ thresh_to_zero_func(T thresh_, T maxVal_ = 0) : thresh(thresh_) {(void)maxVal_;} + explicit __host__ __device__ __forceinline__ thresh_to_zero_func(T thresh_, T maxVal_ = 0) : thresh(thresh_) {CV_UNUSED(maxVal_);} __device__ __forceinline__ T operator()(typename TypeTraits::ParameterType src) const { @@ -615,7 +623,7 @@ namespace cv { namespace cuda { namespace device template struct thresh_to_zero_inv_func : unary_function { - explicit __host__ __device__ __forceinline__ thresh_to_zero_inv_func(T thresh_, T maxVal_ = 0) : thresh(thresh_) {(void)maxVal_;} + explicit __host__ __device__ __forceinline__ thresh_to_zero_inv_func(T thresh_, T maxVal_ = 0) : thresh(thresh_) {CV_UNUSED(maxVal_);} __device__ __forceinline__ T operator()(typename TypeTraits::ParameterType src) const { @@ -794,4 +802,4 @@ namespace cv { namespace cuda { namespace device //! @endcond -#endif // __OPENCV_CUDA_FUNCTIONAL_HPP__ +#endif // OPENCV_CUDA_FUNCTIONAL_HPP diff --git a/IPL/include/opencv/opencv2/core/cuda/limits.hpp b/IPL/include/opencv/opencv2/core/cuda/limits.hpp index b98bdf2..7e15ed6 100644 --- a/IPL/include/opencv/opencv2/core/cuda/limits.hpp +++ b/IPL/include/opencv/opencv2/core/cuda/limits.hpp @@ -40,8 +40,8 @@ // //M*/ -#ifndef __OPENCV_CUDA_LIMITS_HPP__ -#define __OPENCV_CUDA_LIMITS_HPP__ +#ifndef OPENCV_CUDA_LIMITS_HPP +#define OPENCV_CUDA_LIMITS_HPP #include #include @@ -125,4 +125,4 @@ template <> struct numeric_limits //! @endcond -#endif // __OPENCV_CUDA_LIMITS_HPP__ +#endif // OPENCV_CUDA_LIMITS_HPP diff --git a/IPL/include/opencv/opencv2/core/cuda/reduce.hpp b/IPL/include/opencv/opencv2/core/cuda/reduce.hpp index 3133c9a..5de3650 100644 --- a/IPL/include/opencv/opencv2/core/cuda/reduce.hpp +++ b/IPL/include/opencv/opencv2/core/cuda/reduce.hpp @@ -40,8 +40,12 @@ // //M*/ -#ifndef __OPENCV_CUDA_REDUCE_HPP__ -#define __OPENCV_CUDA_REDUCE_HPP__ +#ifndef OPENCV_CUDA_REDUCE_HPP +#define OPENCV_CUDA_REDUCE_HPP + +#ifndef THRUST_DEBUG // eliminate -Wundef warning +#define THRUST_DEBUG 0 +#endif #include #include "detail/reduce.hpp" @@ -202,4 +206,4 @@ namespace cv { namespace cuda { namespace device //! @endcond -#endif // __OPENCV_CUDA_UTILITY_HPP__ +#endif // OPENCV_CUDA_REDUCE_HPP diff --git a/IPL/include/opencv/opencv2/core/cuda/saturate_cast.hpp b/IPL/include/opencv/opencv2/core/cuda/saturate_cast.hpp index f55ae4f..c3a3d1c 100644 --- a/IPL/include/opencv/opencv2/core/cuda/saturate_cast.hpp +++ b/IPL/include/opencv/opencv2/core/cuda/saturate_cast.hpp @@ -40,8 +40,8 @@ // //M*/ -#ifndef __OPENCV_CUDA_SATURATE_CAST_HPP__ -#define __OPENCV_CUDA_SATURATE_CAST_HPP__ +#ifndef OPENCV_CUDA_SATURATE_CAST_HPP +#define OPENCV_CUDA_SATURATE_CAST_HPP #include "common.hpp" @@ -289,4 +289,4 @@ namespace cv { namespace cuda { namespace device //! @endcond -#endif /* __OPENCV_CUDA_SATURATE_CAST_HPP__ */ +#endif /* OPENCV_CUDA_SATURATE_CAST_HPP */ diff --git a/IPL/include/opencv/opencv2/core/cuda/scan.hpp b/IPL/include/opencv/opencv2/core/cuda/scan.hpp index 687abb5..e128fb0 100644 --- a/IPL/include/opencv/opencv2/core/cuda/scan.hpp +++ b/IPL/include/opencv/opencv2/core/cuda/scan.hpp @@ -40,8 +40,8 @@ // //M*/ -#ifndef __OPENCV_CUDA_SCAN_HPP__ -#define __OPENCV_CUDA_SCAN_HPP__ +#ifndef OPENCV_CUDA_SCAN_HPP +#define OPENCV_CUDA_SCAN_HPP #include "opencv2/core/cuda/common.hpp" #include "opencv2/core/cuda/utility.hpp" @@ -61,7 +61,7 @@ namespace cv { namespace cuda { namespace device template struct WarpScan { __device__ __forceinline__ WarpScan() {} - __device__ __forceinline__ WarpScan(const WarpScan& other) { (void)other; } + __device__ __forceinline__ WarpScan(const WarpScan& other) { CV_UNUSED(other); } __device__ __forceinline__ T operator()( volatile T *ptr , const unsigned int idx) { @@ -95,7 +95,7 @@ namespace cv { namespace cuda { namespace device template struct WarpScanNoComp { __device__ __forceinline__ WarpScanNoComp() {} - __device__ __forceinline__ WarpScanNoComp(const WarpScanNoComp& other) { (void)other; } + __device__ __forceinline__ WarpScanNoComp(const WarpScanNoComp& other) { CV_UNUSED(other); } __device__ __forceinline__ T operator()( volatile T *ptr , const unsigned int idx) { @@ -135,7 +135,7 @@ namespace cv { namespace cuda { namespace device template struct BlockScan { __device__ __forceinline__ BlockScan() {} - __device__ __forceinline__ BlockScan(const BlockScan& other) { (void)other; } + __device__ __forceinline__ BlockScan(const BlockScan& other) { CV_UNUSED(other); } __device__ __forceinline__ T operator()(volatile T *ptr) { @@ -255,4 +255,4 @@ namespace cv { namespace cuda { namespace device //! @endcond -#endif // __OPENCV_CUDA_SCAN_HPP__ +#endif // OPENCV_CUDA_SCAN_HPP diff --git a/IPL/include/opencv/opencv2/core/cuda/simd_functions.hpp b/IPL/include/opencv/opencv2/core/cuda/simd_functions.hpp index b9e0041..3d8c2e0 100644 --- a/IPL/include/opencv/opencv2/core/cuda/simd_functions.hpp +++ b/IPL/include/opencv/opencv2/core/cuda/simd_functions.hpp @@ -70,8 +70,8 @@ * POSSIBILITY OF SUCH DAMAGE. */ -#ifndef __OPENCV_CUDA_SIMD_FUNCTIONS_HPP__ -#define __OPENCV_CUDA_SIMD_FUNCTIONS_HPP__ +#ifndef OPENCV_CUDA_SIMD_FUNCTIONS_HPP +#define OPENCV_CUDA_SIMD_FUNCTIONS_HPP #include "common.hpp" @@ -866,4 +866,4 @@ namespace cv { namespace cuda { namespace device //! @endcond -#endif // __OPENCV_CUDA_SIMD_FUNCTIONS_HPP__ +#endif // OPENCV_CUDA_SIMD_FUNCTIONS_HPP diff --git a/IPL/include/opencv/opencv2/core/cuda/transform.hpp b/IPL/include/opencv/opencv2/core/cuda/transform.hpp index 08a313d..42aa6ea 100644 --- a/IPL/include/opencv/opencv2/core/cuda/transform.hpp +++ b/IPL/include/opencv/opencv2/core/cuda/transform.hpp @@ -40,8 +40,8 @@ // //M*/ -#ifndef __OPENCV_CUDA_TRANSFORM_HPP__ -#define __OPENCV_CUDA_TRANSFORM_HPP__ +#ifndef OPENCV_CUDA_TRANSFORM_HPP +#define OPENCV_CUDA_TRANSFORM_HPP #include "common.hpp" #include "utility.hpp" @@ -72,4 +72,4 @@ namespace cv { namespace cuda { namespace device //! @endcond -#endif // __OPENCV_CUDA_TRANSFORM_HPP__ +#endif // OPENCV_CUDA_TRANSFORM_HPP diff --git a/IPL/include/opencv/opencv2/core/cuda/type_traits.hpp b/IPL/include/opencv/opencv2/core/cuda/type_traits.hpp index f2471eb..8b7a3fd 100644 --- a/IPL/include/opencv/opencv2/core/cuda/type_traits.hpp +++ b/IPL/include/opencv/opencv2/core/cuda/type_traits.hpp @@ -40,8 +40,8 @@ // //M*/ -#ifndef __OPENCV_CUDA_TYPE_TRAITS_HPP__ -#define __OPENCV_CUDA_TYPE_TRAITS_HPP__ +#ifndef OPENCV_CUDA_TYPE_TRAITS_HPP +#define OPENCV_CUDA_TYPE_TRAITS_HPP #include "detail/type_traits_detail.hpp" @@ -87,4 +87,4 @@ namespace cv { namespace cuda { namespace device //! @endcond -#endif // __OPENCV_CUDA_TYPE_TRAITS_HPP__ +#endif // OPENCV_CUDA_TYPE_TRAITS_HPP diff --git a/IPL/include/opencv/opencv2/core/cuda/utility.hpp b/IPL/include/opencv/opencv2/core/cuda/utility.hpp index ed60471..7f5db48 100644 --- a/IPL/include/opencv/opencv2/core/cuda/utility.hpp +++ b/IPL/include/opencv/opencv2/core/cuda/utility.hpp @@ -40,8 +40,8 @@ // //M*/ -#ifndef __OPENCV_CUDA_UTILITY_HPP__ -#define __OPENCV_CUDA_UTILITY_HPP__ +#ifndef OPENCV_CUDA_UTILITY_HPP +#define OPENCV_CUDA_UTILITY_HPP #include "saturate_cast.hpp" #include "datamov_utils.hpp" @@ -54,6 +54,15 @@ namespace cv { namespace cuda { namespace device { + struct CV_EXPORTS ThrustAllocator + { + typedef uchar value_type; + virtual ~ThrustAllocator(); + virtual __device__ __host__ uchar* allocate(size_t numBytes) = 0; + virtual __device__ __host__ void deallocate(uchar* ptr, size_t numBytes) = 0; + static ThrustAllocator& getAllocator(); + static void setAllocator(ThrustAllocator* allocator); + }; #define OPENCV_CUDA_LOG_WARP_SIZE (5) #define OPENCV_CUDA_WARP_SIZE (1 << OPENCV_CUDA_LOG_WARP_SIZE) #define OPENCV_CUDA_LOG_MEM_BANKS ((__CUDA_ARCH__ >= 200) ? 5 : 4) // 32 banks on fermi, 16 on tesla @@ -218,4 +227,4 @@ namespace cv { namespace cuda { namespace device //! @endcond -#endif // __OPENCV_CUDA_UTILITY_HPP__ +#endif // OPENCV_CUDA_UTILITY_HPP diff --git a/IPL/include/opencv/opencv2/core/cuda/vec_distance.hpp b/IPL/include/opencv/opencv2/core/cuda/vec_distance.hpp index 013b747..ef6e510 100644 --- a/IPL/include/opencv/opencv2/core/cuda/vec_distance.hpp +++ b/IPL/include/opencv/opencv2/core/cuda/vec_distance.hpp @@ -40,8 +40,8 @@ // //M*/ -#ifndef __OPENCV_CUDA_VEC_DISTANCE_HPP__ -#define __OPENCV_CUDA_VEC_DISTANCE_HPP__ +#ifndef OPENCV_CUDA_VEC_DISTANCE_HPP +#define OPENCV_CUDA_VEC_DISTANCE_HPP #include "reduce.hpp" #include "functional.hpp" @@ -229,4 +229,4 @@ namespace cv { namespace cuda { namespace device //! @endcond -#endif // __OPENCV_CUDA_VEC_DISTANCE_HPP__ +#endif // OPENCV_CUDA_VEC_DISTANCE_HPP diff --git a/IPL/include/opencv/opencv2/core/cuda/vec_math.hpp b/IPL/include/opencv/opencv2/core/cuda/vec_math.hpp index 8595fb8..80b1303 100644 --- a/IPL/include/opencv/opencv2/core/cuda/vec_math.hpp +++ b/IPL/include/opencv/opencv2/core/cuda/vec_math.hpp @@ -40,8 +40,8 @@ // //M*/ -#ifndef __OPENCV_CUDA_VECMATH_HPP__ -#define __OPENCV_CUDA_VECMATH_HPP__ +#ifndef OPENCV_CUDA_VECMATH_HPP +#define OPENCV_CUDA_VECMATH_HPP #include "vec_traits.hpp" #include "saturate_cast.hpp" @@ -199,14 +199,7 @@ CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(~, uint, uint) return VecTraits::make(func (a.x), func (a.y), func (a.z), func (a.w)); \ } -CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(abs, /*::abs*/, uchar, uchar) -CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(abs, ::abs, char, char) -CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(abs, /*::abs*/, ushort, ushort) -CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(abs, ::abs, short, short) -CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(abs, ::abs, int, int) -CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(abs, /*::abs*/, uint, uint) CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(abs, ::fabsf, float, float) -CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(abs, ::fabs, double, double) CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sqrt, ::sqrtf, uchar, float) CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sqrt, ::sqrtf, char, float) @@ -927,4 +920,4 @@ CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2, double, double, double) //! @endcond -#endif // __OPENCV_CUDA_VECMATH_HPP__ +#endif // OPENCV_CUDA_VECMATH_HPP diff --git a/IPL/include/opencv/opencv2/core/cuda/vec_traits.hpp b/IPL/include/opencv/opencv2/core/cuda/vec_traits.hpp index 905e37f..b5ff281 100644 --- a/IPL/include/opencv/opencv2/core/cuda/vec_traits.hpp +++ b/IPL/include/opencv/opencv2/core/cuda/vec_traits.hpp @@ -40,8 +40,8 @@ // //M*/ -#ifndef __OPENCV_CUDA_VEC_TRAITS_HPP__ -#define __OPENCV_CUDA_VEC_TRAITS_HPP__ +#ifndef OPENCV_CUDA_VEC_TRAITS_HPP +#define OPENCV_CUDA_VEC_TRAITS_HPP #include "common.hpp" @@ -285,4 +285,4 @@ namespace cv { namespace cuda { namespace device //! @endcond -#endif // __OPENCV_CUDA_VEC_TRAITS_HPP__ +#endif // OPENCV_CUDA_VEC_TRAITS_HPP diff --git a/IPL/include/opencv/opencv2/core/cuda/warp.hpp b/IPL/include/opencv/opencv2/core/cuda/warp.hpp index d93afe7..8af7e6a 100644 --- a/IPL/include/opencv/opencv2/core/cuda/warp.hpp +++ b/IPL/include/opencv/opencv2/core/cuda/warp.hpp @@ -40,8 +40,8 @@ // //M*/ -#ifndef __OPENCV_CUDA_DEVICE_WARP_HPP__ -#define __OPENCV_CUDA_DEVICE_WARP_HPP__ +#ifndef OPENCV_CUDA_DEVICE_WARP_HPP +#define OPENCV_CUDA_DEVICE_WARP_HPP /** @file * @deprecated Use @ref cudev instead. @@ -64,7 +64,7 @@ namespace cv { namespace cuda { namespace device static __device__ __forceinline__ unsigned int laneId() { unsigned int ret; - asm("mov.u32 %0, %laneid;" : "=r"(ret) ); + asm("mov.u32 %0, %%laneid;" : "=r"(ret) ); return ret; } @@ -136,4 +136,4 @@ namespace cv { namespace cuda { namespace device //! @endcond -#endif /* __OPENCV_CUDA_DEVICE_WARP_HPP__ */ +#endif /* OPENCV_CUDA_DEVICE_WARP_HPP */ diff --git a/IPL/include/opencv/opencv2/core/cuda/warp_shuffle.hpp b/IPL/include/opencv/opencv2/core/cuda/warp_shuffle.hpp index 256fc2a..0da54ae 100644 --- a/IPL/include/opencv/opencv2/core/cuda/warp_shuffle.hpp +++ b/IPL/include/opencv/opencv2/core/cuda/warp_shuffle.hpp @@ -40,8 +40,8 @@ // //M*/ -#ifndef __OPENCV_CUDA_WARP_SHUFFLE_HPP__ -#define __OPENCV_CUDA_WARP_SHUFFLE_HPP__ +#ifndef OPENCV_CUDA_WARP_SHUFFLE_HPP +#define OPENCV_CUDA_WARP_SHUFFLE_HPP /** @file * @deprecated Use @ref cudev instead. @@ -51,6 +51,11 @@ namespace cv { namespace cuda { namespace device { +#if __CUDACC_VER_MAJOR__ >= 9 +# define __shfl(x, y, z) __shfl_sync(0xFFFFFFFFU, x, y, z) +# define __shfl_up(x, y, z) __shfl_up_sync(0xFFFFFFFFU, x, y, z) +# define __shfl_down(x, y, z) __shfl_down_sync(0xFFFFFFFFU, x, y, z) +#endif template __device__ __forceinline__ T shfl(T val, int srcLane, int width = warpSize) { @@ -148,6 +153,10 @@ namespace cv { namespace cuda { namespace device } }}} +# undef __shfl +# undef __shfl_up +# undef __shfl_down + //! @endcond -#endif // __OPENCV_CUDA_WARP_SHUFFLE_HPP__ +#endif // OPENCV_CUDA_WARP_SHUFFLE_HPP diff --git a/IPL/include/opencv/opencv2/core/cuda_stream_accessor.hpp b/IPL/include/opencv/opencv2/core/cuda_stream_accessor.hpp index 0f8ee9b..deaf356 100644 --- a/IPL/include/opencv/opencv2/core/cuda_stream_accessor.hpp +++ b/IPL/include/opencv/opencv2/core/cuda_stream_accessor.hpp @@ -40,8 +40,8 @@ // //M*/ -#ifndef __OPENCV_CORE_CUDA_STREAM_ACCESSOR_HPP__ -#define __OPENCV_CORE_CUDA_STREAM_ACCESSOR_HPP__ +#ifndef OPENCV_CORE_CUDA_STREAM_ACCESSOR_HPP +#define OPENCV_CORE_CUDA_STREAM_ACCESSOR_HPP #ifndef __cplusplus # error cuda_stream_accessor.hpp header must be compiled as C++ @@ -83,4 +83,4 @@ namespace cv } } -#endif /* __OPENCV_CORE_CUDA_STREAM_ACCESSOR_HPP__ */ +#endif /* OPENCV_CORE_CUDA_STREAM_ACCESSOR_HPP */ diff --git a/IPL/include/opencv/opencv2/core/cuda_types.hpp b/IPL/include/opencv/opencv2/core/cuda_types.hpp index 8df816e..45dc2ca 100644 --- a/IPL/include/opencv/opencv2/core/cuda_types.hpp +++ b/IPL/include/opencv/opencv2/core/cuda_types.hpp @@ -40,13 +40,20 @@ // //M*/ -#ifndef __OPENCV_CORE_CUDA_TYPES_HPP__ -#define __OPENCV_CORE_CUDA_TYPES_HPP__ +#ifndef OPENCV_CORE_CUDA_TYPES_HPP +#define OPENCV_CORE_CUDA_TYPES_HPP #ifndef __cplusplus # error cuda_types.hpp header must be compiled as C++ #endif +#if defined(__OPENCV_BUILD) && defined(__clang__) +#pragma clang diagnostic ignored "-Winconsistent-missing-override" +#endif +#if defined(__OPENCV_BUILD) && defined(__GNUC__) && __GNUC__ >= 5 +#pragma GCC diagnostic ignored "-Wsuggest-override" +#endif + /** @file * @deprecated Use @ref cudev instead. */ @@ -120,10 +127,12 @@ namespace cv }; typedef PtrStepSz PtrStepSzb; + typedef PtrStepSz PtrStepSzus; typedef PtrStepSz PtrStepSzf; typedef PtrStepSz PtrStepSzi; typedef PtrStep PtrStepb; + typedef PtrStep PtrStepus; typedef PtrStep PtrStepf; typedef PtrStep PtrStepi; @@ -132,4 +141,4 @@ namespace cv //! @endcond -#endif /* __OPENCV_CORE_CUDA_TYPES_HPP__ */ +#endif /* OPENCV_CORE_CUDA_TYPES_HPP */ diff --git a/IPL/include/opencv/opencv2/core/cv_cpu_dispatch.h b/IPL/include/opencv/opencv2/core/cv_cpu_dispatch.h new file mode 100644 index 0000000..42651ae --- /dev/null +++ b/IPL/include/opencv/opencv2/core/cv_cpu_dispatch.h @@ -0,0 +1,345 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#if defined __OPENCV_BUILD \ + +#include "cv_cpu_config.h" +#include "cv_cpu_helper.h" + +#ifdef CV_CPU_DISPATCH_MODE +#define CV_CPU_OPTIMIZATION_NAMESPACE __CV_CAT(opt_, CV_CPU_DISPATCH_MODE) +#define CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN namespace __CV_CAT(opt_, CV_CPU_DISPATCH_MODE) { +#define CV_CPU_OPTIMIZATION_NAMESPACE_END } +#else +#define CV_CPU_OPTIMIZATION_NAMESPACE cpu_baseline +#define CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN namespace cpu_baseline { +#define CV_CPU_OPTIMIZATION_NAMESPACE_END } +#define CV_CPU_BASELINE_MODE 1 +#endif + + +#define __CV_CPU_DISPATCH_CHAIN_END(fn, args, mode, ...) /* done */ +#define __CV_CPU_DISPATCH(fn, args, mode, ...) __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__)) +#define __CV_CPU_DISPATCH_EXPAND(fn, args, ...) __CV_EXPAND(__CV_CPU_DISPATCH(fn, args, __VA_ARGS__)) +#define CV_CPU_DISPATCH(fn, args, ...) __CV_CPU_DISPATCH_EXPAND(fn, args, __VA_ARGS__, END) // expand macros + + +#if defined CV_ENABLE_INTRINSICS \ + && !defined CV_DISABLE_OPTIMIZATION \ + && !defined __CUDACC__ /* do not include SSE/AVX/NEON headers for NVCC compiler */ \ + +#ifdef CV_CPU_COMPILE_SSE2 +# include +# define CV_MMX 1 +# define CV_SSE 1 +# define CV_SSE2 1 +#endif +#ifdef CV_CPU_COMPILE_SSE3 +# include +# define CV_SSE3 1 +#endif +#ifdef CV_CPU_COMPILE_SSSE3 +# include +# define CV_SSSE3 1 +#endif +#ifdef CV_CPU_COMPILE_SSE4_1 +# include +# define CV_SSE4_1 1 +#endif +#ifdef CV_CPU_COMPILE_SSE4_2 +# include +# define CV_SSE4_2 1 +#endif +#ifdef CV_CPU_COMPILE_POPCNT +# ifdef _MSC_VER +# include +# if defined(_M_X64) +# define CV_POPCNT_U64 _mm_popcnt_u64 +# endif +# define CV_POPCNT_U32 _mm_popcnt_u32 +# else +# include +# if defined(__x86_64__) +# define CV_POPCNT_U64 __builtin_popcountll +# endif +# define CV_POPCNT_U32 __builtin_popcount +# endif +# define CV_POPCNT 1 +#endif +#ifdef CV_CPU_COMPILE_AVX +# include +# define CV_AVX 1 +#endif +#ifdef CV_CPU_COMPILE_FP16 +# if defined(__arm__) || defined(__aarch64__) || defined(_M_ARM) || defined(_M_ARM64) +# include +# else +# include +# endif +# define CV_FP16 1 +#endif +#ifdef CV_CPU_COMPILE_AVX2 +# include +# define CV_AVX2 1 +#endif +#ifdef CV_CPU_COMPILE_AVX_512F +# include +# define CV_AVX_512F 1 +#endif +#ifdef CV_CPU_COMPILE_AVX512_COMMON +# define CV_AVX512_COMMON 1 +# define CV_AVX_512CD 1 +#endif +#ifdef CV_CPU_COMPILE_AVX512_KNL +# define CV_AVX512_KNL 1 +# define CV_AVX_512ER 1 +# define CV_AVX_512PF 1 +#endif +#ifdef CV_CPU_COMPILE_AVX512_KNM +# define CV_AVX512_KNM 1 +# define CV_AVX_5124FMAPS 1 +# define CV_AVX_5124VNNIW 1 +# define CV_AVX_512VPOPCNTDQ 1 +#endif +#ifdef CV_CPU_COMPILE_AVX512_SKX +# define CV_AVX512_SKX 1 +# define CV_AVX_512VL 1 +# define CV_AVX_512BW 1 +# define CV_AVX_512DQ 1 +#endif +#ifdef CV_CPU_COMPILE_AVX512_CNL +# define CV_AVX512_CNL 1 +# define CV_AVX_512IFMA 1 +# define CV_AVX_512VBMI 1 +#endif +#ifdef CV_CPU_COMPILE_AVX512_CLX +# define CV_AVX512_CLX 1 +# define CV_AVX_512VNNI 1 +#endif +#ifdef CV_CPU_COMPILE_AVX512_ICL +# define CV_AVX512_ICL 1 +# undef CV_AVX_512IFMA +# define CV_AVX_512IFMA 1 +# undef CV_AVX_512VBMI +# define CV_AVX_512VBMI 1 +# undef CV_AVX_512VNNI +# define CV_AVX_512VNNI 1 +# define CV_AVX_512VBMI2 1 +# define CV_AVX_512BITALG 1 +# define CV_AVX_512VPOPCNTDQ 1 +#endif +#ifdef CV_CPU_COMPILE_FMA3 +# define CV_FMA3 1 +#endif + +#if defined _WIN32 && (defined(_M_ARM) || defined(_M_ARM64)) && (defined(CV_CPU_COMPILE_NEON) || !defined(_MSC_VER)) +# include +# include +# define CV_NEON 1 +#elif defined(__ARM_NEON__) || (defined (__ARM_NEON) && defined(__aarch64__)) +# include +# define CV_NEON 1 +#endif + +#if defined(__ARM_NEON__) || defined(__aarch64__) +# include +#endif + +#ifdef CV_CPU_COMPILE_VSX +# include +# undef vector +# undef pixel +# undef bool +# define CV_VSX 1 +#endif + +#ifdef CV_CPU_COMPILE_VSX3 +# define CV_VSX3 1 +#endif + +#ifdef CV_CPU_COMPILE_MSA +# include "hal/msa_macros.h" +# define CV_MSA 1 +#endif + +#ifdef __EMSCRIPTEN__ +# define CV_WASM_SIMD 1 +# include +#endif + +#endif // CV_ENABLE_INTRINSICS && !CV_DISABLE_OPTIMIZATION && !__CUDACC__ + +#if defined CV_CPU_COMPILE_AVX && !defined CV_CPU_BASELINE_COMPILE_AVX +struct VZeroUpperGuard { +#ifdef __GNUC__ + __attribute__((always_inline)) +#endif + inline VZeroUpperGuard() { _mm256_zeroupper(); } +#ifdef __GNUC__ + __attribute__((always_inline)) +#endif + inline ~VZeroUpperGuard() { _mm256_zeroupper(); } +}; +#define __CV_AVX_GUARD VZeroUpperGuard __vzeroupper_guard; CV_UNUSED(__vzeroupper_guard); +#endif + +#ifdef __CV_AVX_GUARD +#define CV_AVX_GUARD __CV_AVX_GUARD +#else +#define CV_AVX_GUARD +#endif + +#endif // __OPENCV_BUILD + + + +#if !defined __OPENCV_BUILD /* Compatibility code */ \ + && !defined __CUDACC__ /* do not include SSE/AVX/NEON headers for NVCC compiler */ +#if defined __SSE2__ || defined _M_X64 || (defined _M_IX86_FP && _M_IX86_FP >= 2) +# include +# define CV_MMX 1 +# define CV_SSE 1 +# define CV_SSE2 1 +#elif defined _WIN32 && (defined(_M_ARM) || defined(_M_ARM64)) && (defined(CV_CPU_COMPILE_NEON) || !defined(_MSC_VER)) +# include +# include +# define CV_NEON 1 +#elif defined(__ARM_NEON__) || (defined (__ARM_NEON) && defined(__aarch64__)) +# include +# define CV_NEON 1 +#elif defined(__VSX__) && defined(__PPC64__) && defined(__LITTLE_ENDIAN__) +# include +# undef vector +# undef pixel +# undef bool +# define CV_VSX 1 +#endif + +#endif // !__OPENCV_BUILD && !__CUDACC (Compatibility code) + + + +#ifndef CV_MMX +# define CV_MMX 0 +#endif +#ifndef CV_SSE +# define CV_SSE 0 +#endif +#ifndef CV_SSE2 +# define CV_SSE2 0 +#endif +#ifndef CV_SSE3 +# define CV_SSE3 0 +#endif +#ifndef CV_SSSE3 +# define CV_SSSE3 0 +#endif +#ifndef CV_SSE4_1 +# define CV_SSE4_1 0 +#endif +#ifndef CV_SSE4_2 +# define CV_SSE4_2 0 +#endif +#ifndef CV_POPCNT +# define CV_POPCNT 0 +#endif +#ifndef CV_AVX +# define CV_AVX 0 +#endif +#ifndef CV_FP16 +# define CV_FP16 0 +#endif +#ifndef CV_AVX2 +# define CV_AVX2 0 +#endif +#ifndef CV_FMA3 +# define CV_FMA3 0 +#endif +#ifndef CV_AVX_512F +# define CV_AVX_512F 0 +#endif +#ifndef CV_AVX_512BW +# define CV_AVX_512BW 0 +#endif +#ifndef CV_AVX_512CD +# define CV_AVX_512CD 0 +#endif +#ifndef CV_AVX_512DQ +# define CV_AVX_512DQ 0 +#endif +#ifndef CV_AVX_512ER +# define CV_AVX_512ER 0 +#endif +#ifndef CV_AVX_512IFMA +# define CV_AVX_512IFMA 0 +#endif +#define CV_AVX_512IFMA512 CV_AVX_512IFMA // deprecated +#ifndef CV_AVX_512PF +# define CV_AVX_512PF 0 +#endif +#ifndef CV_AVX_512VBMI +# define CV_AVX_512VBMI 0 +#endif +#ifndef CV_AVX_512VL +# define CV_AVX_512VL 0 +#endif +#ifndef CV_AVX_5124FMAPS +# define CV_AVX_5124FMAPS 0 +#endif +#ifndef CV_AVX_5124VNNIW +# define CV_AVX_5124VNNIW 0 +#endif +#ifndef CV_AVX_512VPOPCNTDQ +# define CV_AVX_512VPOPCNTDQ 0 +#endif +#ifndef CV_AVX_512VNNI +# define CV_AVX_512VNNI 0 +#endif +#ifndef CV_AVX_512VBMI2 +# define CV_AVX_512VBMI2 0 +#endif +#ifndef CV_AVX_512BITALG +# define CV_AVX_512BITALG 0 +#endif +#ifndef CV_AVX512_COMMON +# define CV_AVX512_COMMON 0 +#endif +#ifndef CV_AVX512_KNL +# define CV_AVX512_KNL 0 +#endif +#ifndef CV_AVX512_KNM +# define CV_AVX512_KNM 0 +#endif +#ifndef CV_AVX512_SKX +# define CV_AVX512_SKX 0 +#endif +#ifndef CV_AVX512_CNL +# define CV_AVX512_CNL 0 +#endif +#ifndef CV_AVX512_CLX +# define CV_AVX512_CLX 0 +#endif +#ifndef CV_AVX512_ICL +# define CV_AVX512_ICL 0 +#endif + +#ifndef CV_NEON +# define CV_NEON 0 +#endif + +#ifndef CV_VSX +# define CV_VSX 0 +#endif + +#ifndef CV_VSX3 +# define CV_VSX3 0 +#endif + +#ifndef CV_MSA +# define CV_MSA 0 +#endif + +#ifndef CV_WASM_SIMD +# define CV_WASM_SIMD 0 +#endif diff --git a/IPL/include/opencv/opencv2/core/cv_cpu_helper.h b/IPL/include/opencv/opencv2/core/cv_cpu_helper.h new file mode 100644 index 0000000..aaa89ed --- /dev/null +++ b/IPL/include/opencv/opencv2/core/cv_cpu_helper.h @@ -0,0 +1,487 @@ +// AUTOGENERATED, DO NOT EDIT + +#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_SSE +# define CV_TRY_SSE 1 +# define CV_CPU_FORCE_SSE 1 +# define CV_CPU_HAS_SUPPORT_SSE 1 +# define CV_CPU_CALL_SSE(fn, args) return (cpu_baseline::fn args) +# define CV_CPU_CALL_SSE_(fn, args) return (opt_SSE::fn args) +#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_SSE +# define CV_TRY_SSE 1 +# define CV_CPU_FORCE_SSE 0 +# define CV_CPU_HAS_SUPPORT_SSE (cv::checkHardwareSupport(CV_CPU_SSE)) +# define CV_CPU_CALL_SSE(fn, args) if (CV_CPU_HAS_SUPPORT_SSE) return (opt_SSE::fn args) +# define CV_CPU_CALL_SSE_(fn, args) if (CV_CPU_HAS_SUPPORT_SSE) return (opt_SSE::fn args) +#else +# define CV_TRY_SSE 0 +# define CV_CPU_FORCE_SSE 0 +# define CV_CPU_HAS_SUPPORT_SSE 0 +# define CV_CPU_CALL_SSE(fn, args) +# define CV_CPU_CALL_SSE_(fn, args) +#endif +#define __CV_CPU_DISPATCH_CHAIN_SSE(fn, args, mode, ...) CV_CPU_CALL_SSE(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__)) + +#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_SSE2 +# define CV_TRY_SSE2 1 +# define CV_CPU_FORCE_SSE2 1 +# define CV_CPU_HAS_SUPPORT_SSE2 1 +# define CV_CPU_CALL_SSE2(fn, args) return (cpu_baseline::fn args) +# define CV_CPU_CALL_SSE2_(fn, args) return (opt_SSE2::fn args) +#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_SSE2 +# define CV_TRY_SSE2 1 +# define CV_CPU_FORCE_SSE2 0 +# define CV_CPU_HAS_SUPPORT_SSE2 (cv::checkHardwareSupport(CV_CPU_SSE2)) +# define CV_CPU_CALL_SSE2(fn, args) if (CV_CPU_HAS_SUPPORT_SSE2) return (opt_SSE2::fn args) +# define CV_CPU_CALL_SSE2_(fn, args) if (CV_CPU_HAS_SUPPORT_SSE2) return (opt_SSE2::fn args) +#else +# define CV_TRY_SSE2 0 +# define CV_CPU_FORCE_SSE2 0 +# define CV_CPU_HAS_SUPPORT_SSE2 0 +# define CV_CPU_CALL_SSE2(fn, args) +# define CV_CPU_CALL_SSE2_(fn, args) +#endif +#define __CV_CPU_DISPATCH_CHAIN_SSE2(fn, args, mode, ...) CV_CPU_CALL_SSE2(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__)) + +#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_SSE3 +# define CV_TRY_SSE3 1 +# define CV_CPU_FORCE_SSE3 1 +# define CV_CPU_HAS_SUPPORT_SSE3 1 +# define CV_CPU_CALL_SSE3(fn, args) return (cpu_baseline::fn args) +# define CV_CPU_CALL_SSE3_(fn, args) return (opt_SSE3::fn args) +#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_SSE3 +# define CV_TRY_SSE3 1 +# define CV_CPU_FORCE_SSE3 0 +# define CV_CPU_HAS_SUPPORT_SSE3 (cv::checkHardwareSupport(CV_CPU_SSE3)) +# define CV_CPU_CALL_SSE3(fn, args) if (CV_CPU_HAS_SUPPORT_SSE3) return (opt_SSE3::fn args) +# define CV_CPU_CALL_SSE3_(fn, args) if (CV_CPU_HAS_SUPPORT_SSE3) return (opt_SSE3::fn args) +#else +# define CV_TRY_SSE3 0 +# define CV_CPU_FORCE_SSE3 0 +# define CV_CPU_HAS_SUPPORT_SSE3 0 +# define CV_CPU_CALL_SSE3(fn, args) +# define CV_CPU_CALL_SSE3_(fn, args) +#endif +#define __CV_CPU_DISPATCH_CHAIN_SSE3(fn, args, mode, ...) CV_CPU_CALL_SSE3(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__)) + +#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_SSSE3 +# define CV_TRY_SSSE3 1 +# define CV_CPU_FORCE_SSSE3 1 +# define CV_CPU_HAS_SUPPORT_SSSE3 1 +# define CV_CPU_CALL_SSSE3(fn, args) return (cpu_baseline::fn args) +# define CV_CPU_CALL_SSSE3_(fn, args) return (opt_SSSE3::fn args) +#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_SSSE3 +# define CV_TRY_SSSE3 1 +# define CV_CPU_FORCE_SSSE3 0 +# define CV_CPU_HAS_SUPPORT_SSSE3 (cv::checkHardwareSupport(CV_CPU_SSSE3)) +# define CV_CPU_CALL_SSSE3(fn, args) if (CV_CPU_HAS_SUPPORT_SSSE3) return (opt_SSSE3::fn args) +# define CV_CPU_CALL_SSSE3_(fn, args) if (CV_CPU_HAS_SUPPORT_SSSE3) return (opt_SSSE3::fn args) +#else +# define CV_TRY_SSSE3 0 +# define CV_CPU_FORCE_SSSE3 0 +# define CV_CPU_HAS_SUPPORT_SSSE3 0 +# define CV_CPU_CALL_SSSE3(fn, args) +# define CV_CPU_CALL_SSSE3_(fn, args) +#endif +#define __CV_CPU_DISPATCH_CHAIN_SSSE3(fn, args, mode, ...) CV_CPU_CALL_SSSE3(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__)) + +#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_SSE4_1 +# define CV_TRY_SSE4_1 1 +# define CV_CPU_FORCE_SSE4_1 1 +# define CV_CPU_HAS_SUPPORT_SSE4_1 1 +# define CV_CPU_CALL_SSE4_1(fn, args) return (cpu_baseline::fn args) +# define CV_CPU_CALL_SSE4_1_(fn, args) return (opt_SSE4_1::fn args) +#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_SSE4_1 +# define CV_TRY_SSE4_1 1 +# define CV_CPU_FORCE_SSE4_1 0 +# define CV_CPU_HAS_SUPPORT_SSE4_1 (cv::checkHardwareSupport(CV_CPU_SSE4_1)) +# define CV_CPU_CALL_SSE4_1(fn, args) if (CV_CPU_HAS_SUPPORT_SSE4_1) return (opt_SSE4_1::fn args) +# define CV_CPU_CALL_SSE4_1_(fn, args) if (CV_CPU_HAS_SUPPORT_SSE4_1) return (opt_SSE4_1::fn args) +#else +# define CV_TRY_SSE4_1 0 +# define CV_CPU_FORCE_SSE4_1 0 +# define CV_CPU_HAS_SUPPORT_SSE4_1 0 +# define CV_CPU_CALL_SSE4_1(fn, args) +# define CV_CPU_CALL_SSE4_1_(fn, args) +#endif +#define __CV_CPU_DISPATCH_CHAIN_SSE4_1(fn, args, mode, ...) CV_CPU_CALL_SSE4_1(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__)) + +#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_SSE4_2 +# define CV_TRY_SSE4_2 1 +# define CV_CPU_FORCE_SSE4_2 1 +# define CV_CPU_HAS_SUPPORT_SSE4_2 1 +# define CV_CPU_CALL_SSE4_2(fn, args) return (cpu_baseline::fn args) +# define CV_CPU_CALL_SSE4_2_(fn, args) return (opt_SSE4_2::fn args) +#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_SSE4_2 +# define CV_TRY_SSE4_2 1 +# define CV_CPU_FORCE_SSE4_2 0 +# define CV_CPU_HAS_SUPPORT_SSE4_2 (cv::checkHardwareSupport(CV_CPU_SSE4_2)) +# define CV_CPU_CALL_SSE4_2(fn, args) if (CV_CPU_HAS_SUPPORT_SSE4_2) return (opt_SSE4_2::fn args) +# define CV_CPU_CALL_SSE4_2_(fn, args) if (CV_CPU_HAS_SUPPORT_SSE4_2) return (opt_SSE4_2::fn args) +#else +# define CV_TRY_SSE4_2 0 +# define CV_CPU_FORCE_SSE4_2 0 +# define CV_CPU_HAS_SUPPORT_SSE4_2 0 +# define CV_CPU_CALL_SSE4_2(fn, args) +# define CV_CPU_CALL_SSE4_2_(fn, args) +#endif +#define __CV_CPU_DISPATCH_CHAIN_SSE4_2(fn, args, mode, ...) CV_CPU_CALL_SSE4_2(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__)) + +#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_POPCNT +# define CV_TRY_POPCNT 1 +# define CV_CPU_FORCE_POPCNT 1 +# define CV_CPU_HAS_SUPPORT_POPCNT 1 +# define CV_CPU_CALL_POPCNT(fn, args) return (cpu_baseline::fn args) +# define CV_CPU_CALL_POPCNT_(fn, args) return (opt_POPCNT::fn args) +#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_POPCNT +# define CV_TRY_POPCNT 1 +# define CV_CPU_FORCE_POPCNT 0 +# define CV_CPU_HAS_SUPPORT_POPCNT (cv::checkHardwareSupport(CV_CPU_POPCNT)) +# define CV_CPU_CALL_POPCNT(fn, args) if (CV_CPU_HAS_SUPPORT_POPCNT) return (opt_POPCNT::fn args) +# define CV_CPU_CALL_POPCNT_(fn, args) if (CV_CPU_HAS_SUPPORT_POPCNT) return (opt_POPCNT::fn args) +#else +# define CV_TRY_POPCNT 0 +# define CV_CPU_FORCE_POPCNT 0 +# define CV_CPU_HAS_SUPPORT_POPCNT 0 +# define CV_CPU_CALL_POPCNT(fn, args) +# define CV_CPU_CALL_POPCNT_(fn, args) +#endif +#define __CV_CPU_DISPATCH_CHAIN_POPCNT(fn, args, mode, ...) CV_CPU_CALL_POPCNT(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__)) + +#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_AVX +# define CV_TRY_AVX 1 +# define CV_CPU_FORCE_AVX 1 +# define CV_CPU_HAS_SUPPORT_AVX 1 +# define CV_CPU_CALL_AVX(fn, args) return (cpu_baseline::fn args) +# define CV_CPU_CALL_AVX_(fn, args) return (opt_AVX::fn args) +#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_AVX +# define CV_TRY_AVX 1 +# define CV_CPU_FORCE_AVX 0 +# define CV_CPU_HAS_SUPPORT_AVX (cv::checkHardwareSupport(CV_CPU_AVX)) +# define CV_CPU_CALL_AVX(fn, args) if (CV_CPU_HAS_SUPPORT_AVX) return (opt_AVX::fn args) +# define CV_CPU_CALL_AVX_(fn, args) if (CV_CPU_HAS_SUPPORT_AVX) return (opt_AVX::fn args) +#else +# define CV_TRY_AVX 0 +# define CV_CPU_FORCE_AVX 0 +# define CV_CPU_HAS_SUPPORT_AVX 0 +# define CV_CPU_CALL_AVX(fn, args) +# define CV_CPU_CALL_AVX_(fn, args) +#endif +#define __CV_CPU_DISPATCH_CHAIN_AVX(fn, args, mode, ...) CV_CPU_CALL_AVX(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__)) + +#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_FP16 +# define CV_TRY_FP16 1 +# define CV_CPU_FORCE_FP16 1 +# define CV_CPU_HAS_SUPPORT_FP16 1 +# define CV_CPU_CALL_FP16(fn, args) return (cpu_baseline::fn args) +# define CV_CPU_CALL_FP16_(fn, args) return (opt_FP16::fn args) +#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_FP16 +# define CV_TRY_FP16 1 +# define CV_CPU_FORCE_FP16 0 +# define CV_CPU_HAS_SUPPORT_FP16 (cv::checkHardwareSupport(CV_CPU_FP16)) +# define CV_CPU_CALL_FP16(fn, args) if (CV_CPU_HAS_SUPPORT_FP16) return (opt_FP16::fn args) +# define CV_CPU_CALL_FP16_(fn, args) if (CV_CPU_HAS_SUPPORT_FP16) return (opt_FP16::fn args) +#else +# define CV_TRY_FP16 0 +# define CV_CPU_FORCE_FP16 0 +# define CV_CPU_HAS_SUPPORT_FP16 0 +# define CV_CPU_CALL_FP16(fn, args) +# define CV_CPU_CALL_FP16_(fn, args) +#endif +#define __CV_CPU_DISPATCH_CHAIN_FP16(fn, args, mode, ...) CV_CPU_CALL_FP16(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__)) + +#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_AVX2 +# define CV_TRY_AVX2 1 +# define CV_CPU_FORCE_AVX2 1 +# define CV_CPU_HAS_SUPPORT_AVX2 1 +# define CV_CPU_CALL_AVX2(fn, args) return (cpu_baseline::fn args) +# define CV_CPU_CALL_AVX2_(fn, args) return (opt_AVX2::fn args) +#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_AVX2 +# define CV_TRY_AVX2 1 +# define CV_CPU_FORCE_AVX2 0 +# define CV_CPU_HAS_SUPPORT_AVX2 (cv::checkHardwareSupport(CV_CPU_AVX2)) +# define CV_CPU_CALL_AVX2(fn, args) if (CV_CPU_HAS_SUPPORT_AVX2) return (opt_AVX2::fn args) +# define CV_CPU_CALL_AVX2_(fn, args) if (CV_CPU_HAS_SUPPORT_AVX2) return (opt_AVX2::fn args) +#else +# define CV_TRY_AVX2 0 +# define CV_CPU_FORCE_AVX2 0 +# define CV_CPU_HAS_SUPPORT_AVX2 0 +# define CV_CPU_CALL_AVX2(fn, args) +# define CV_CPU_CALL_AVX2_(fn, args) +#endif +#define __CV_CPU_DISPATCH_CHAIN_AVX2(fn, args, mode, ...) CV_CPU_CALL_AVX2(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__)) + +#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_FMA3 +# define CV_TRY_FMA3 1 +# define CV_CPU_FORCE_FMA3 1 +# define CV_CPU_HAS_SUPPORT_FMA3 1 +# define CV_CPU_CALL_FMA3(fn, args) return (cpu_baseline::fn args) +# define CV_CPU_CALL_FMA3_(fn, args) return (opt_FMA3::fn args) +#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_FMA3 +# define CV_TRY_FMA3 1 +# define CV_CPU_FORCE_FMA3 0 +# define CV_CPU_HAS_SUPPORT_FMA3 (cv::checkHardwareSupport(CV_CPU_FMA3)) +# define CV_CPU_CALL_FMA3(fn, args) if (CV_CPU_HAS_SUPPORT_FMA3) return (opt_FMA3::fn args) +# define CV_CPU_CALL_FMA3_(fn, args) if (CV_CPU_HAS_SUPPORT_FMA3) return (opt_FMA3::fn args) +#else +# define CV_TRY_FMA3 0 +# define CV_CPU_FORCE_FMA3 0 +# define CV_CPU_HAS_SUPPORT_FMA3 0 +# define CV_CPU_CALL_FMA3(fn, args) +# define CV_CPU_CALL_FMA3_(fn, args) +#endif +#define __CV_CPU_DISPATCH_CHAIN_FMA3(fn, args, mode, ...) CV_CPU_CALL_FMA3(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__)) + +#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_AVX_512F +# define CV_TRY_AVX_512F 1 +# define CV_CPU_FORCE_AVX_512F 1 +# define CV_CPU_HAS_SUPPORT_AVX_512F 1 +# define CV_CPU_CALL_AVX_512F(fn, args) return (cpu_baseline::fn args) +# define CV_CPU_CALL_AVX_512F_(fn, args) return (opt_AVX_512F::fn args) +#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_AVX_512F +# define CV_TRY_AVX_512F 1 +# define CV_CPU_FORCE_AVX_512F 0 +# define CV_CPU_HAS_SUPPORT_AVX_512F (cv::checkHardwareSupport(CV_CPU_AVX_512F)) +# define CV_CPU_CALL_AVX_512F(fn, args) if (CV_CPU_HAS_SUPPORT_AVX_512F) return (opt_AVX_512F::fn args) +# define CV_CPU_CALL_AVX_512F_(fn, args) if (CV_CPU_HAS_SUPPORT_AVX_512F) return (opt_AVX_512F::fn args) +#else +# define CV_TRY_AVX_512F 0 +# define CV_CPU_FORCE_AVX_512F 0 +# define CV_CPU_HAS_SUPPORT_AVX_512F 0 +# define CV_CPU_CALL_AVX_512F(fn, args) +# define CV_CPU_CALL_AVX_512F_(fn, args) +#endif +#define __CV_CPU_DISPATCH_CHAIN_AVX_512F(fn, args, mode, ...) CV_CPU_CALL_AVX_512F(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__)) + +#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_AVX512_COMMON +# define CV_TRY_AVX512_COMMON 1 +# define CV_CPU_FORCE_AVX512_COMMON 1 +# define CV_CPU_HAS_SUPPORT_AVX512_COMMON 1 +# define CV_CPU_CALL_AVX512_COMMON(fn, args) return (cpu_baseline::fn args) +# define CV_CPU_CALL_AVX512_COMMON_(fn, args) return (opt_AVX512_COMMON::fn args) +#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_AVX512_COMMON +# define CV_TRY_AVX512_COMMON 1 +# define CV_CPU_FORCE_AVX512_COMMON 0 +# define CV_CPU_HAS_SUPPORT_AVX512_COMMON (cv::checkHardwareSupport(CV_CPU_AVX512_COMMON)) +# define CV_CPU_CALL_AVX512_COMMON(fn, args) if (CV_CPU_HAS_SUPPORT_AVX512_COMMON) return (opt_AVX512_COMMON::fn args) +# define CV_CPU_CALL_AVX512_COMMON_(fn, args) if (CV_CPU_HAS_SUPPORT_AVX512_COMMON) return (opt_AVX512_COMMON::fn args) +#else +# define CV_TRY_AVX512_COMMON 0 +# define CV_CPU_FORCE_AVX512_COMMON 0 +# define CV_CPU_HAS_SUPPORT_AVX512_COMMON 0 +# define CV_CPU_CALL_AVX512_COMMON(fn, args) +# define CV_CPU_CALL_AVX512_COMMON_(fn, args) +#endif +#define __CV_CPU_DISPATCH_CHAIN_AVX512_COMMON(fn, args, mode, ...) CV_CPU_CALL_AVX512_COMMON(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__)) + +#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_AVX512_KNL +# define CV_TRY_AVX512_KNL 1 +# define CV_CPU_FORCE_AVX512_KNL 1 +# define CV_CPU_HAS_SUPPORT_AVX512_KNL 1 +# define CV_CPU_CALL_AVX512_KNL(fn, args) return (cpu_baseline::fn args) +# define CV_CPU_CALL_AVX512_KNL_(fn, args) return (opt_AVX512_KNL::fn args) +#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_AVX512_KNL +# define CV_TRY_AVX512_KNL 1 +# define CV_CPU_FORCE_AVX512_KNL 0 +# define CV_CPU_HAS_SUPPORT_AVX512_KNL (cv::checkHardwareSupport(CV_CPU_AVX512_KNL)) +# define CV_CPU_CALL_AVX512_KNL(fn, args) if (CV_CPU_HAS_SUPPORT_AVX512_KNL) return (opt_AVX512_KNL::fn args) +# define CV_CPU_CALL_AVX512_KNL_(fn, args) if (CV_CPU_HAS_SUPPORT_AVX512_KNL) return (opt_AVX512_KNL::fn args) +#else +# define CV_TRY_AVX512_KNL 0 +# define CV_CPU_FORCE_AVX512_KNL 0 +# define CV_CPU_HAS_SUPPORT_AVX512_KNL 0 +# define CV_CPU_CALL_AVX512_KNL(fn, args) +# define CV_CPU_CALL_AVX512_KNL_(fn, args) +#endif +#define __CV_CPU_DISPATCH_CHAIN_AVX512_KNL(fn, args, mode, ...) CV_CPU_CALL_AVX512_KNL(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__)) + +#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_AVX512_KNM +# define CV_TRY_AVX512_KNM 1 +# define CV_CPU_FORCE_AVX512_KNM 1 +# define CV_CPU_HAS_SUPPORT_AVX512_KNM 1 +# define CV_CPU_CALL_AVX512_KNM(fn, args) return (cpu_baseline::fn args) +# define CV_CPU_CALL_AVX512_KNM_(fn, args) return (opt_AVX512_KNM::fn args) +#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_AVX512_KNM +# define CV_TRY_AVX512_KNM 1 +# define CV_CPU_FORCE_AVX512_KNM 0 +# define CV_CPU_HAS_SUPPORT_AVX512_KNM (cv::checkHardwareSupport(CV_CPU_AVX512_KNM)) +# define CV_CPU_CALL_AVX512_KNM(fn, args) if (CV_CPU_HAS_SUPPORT_AVX512_KNM) return (opt_AVX512_KNM::fn args) +# define CV_CPU_CALL_AVX512_KNM_(fn, args) if (CV_CPU_HAS_SUPPORT_AVX512_KNM) return (opt_AVX512_KNM::fn args) +#else +# define CV_TRY_AVX512_KNM 0 +# define CV_CPU_FORCE_AVX512_KNM 0 +# define CV_CPU_HAS_SUPPORT_AVX512_KNM 0 +# define CV_CPU_CALL_AVX512_KNM(fn, args) +# define CV_CPU_CALL_AVX512_KNM_(fn, args) +#endif +#define __CV_CPU_DISPATCH_CHAIN_AVX512_KNM(fn, args, mode, ...) CV_CPU_CALL_AVX512_KNM(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__)) + +#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_AVX512_SKX +# define CV_TRY_AVX512_SKX 1 +# define CV_CPU_FORCE_AVX512_SKX 1 +# define CV_CPU_HAS_SUPPORT_AVX512_SKX 1 +# define CV_CPU_CALL_AVX512_SKX(fn, args) return (cpu_baseline::fn args) +# define CV_CPU_CALL_AVX512_SKX_(fn, args) return (opt_AVX512_SKX::fn args) +#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_AVX512_SKX +# define CV_TRY_AVX512_SKX 1 +# define CV_CPU_FORCE_AVX512_SKX 0 +# define CV_CPU_HAS_SUPPORT_AVX512_SKX (cv::checkHardwareSupport(CV_CPU_AVX512_SKX)) +# define CV_CPU_CALL_AVX512_SKX(fn, args) if (CV_CPU_HAS_SUPPORT_AVX512_SKX) return (opt_AVX512_SKX::fn args) +# define CV_CPU_CALL_AVX512_SKX_(fn, args) if (CV_CPU_HAS_SUPPORT_AVX512_SKX) return (opt_AVX512_SKX::fn args) +#else +# define CV_TRY_AVX512_SKX 0 +# define CV_CPU_FORCE_AVX512_SKX 0 +# define CV_CPU_HAS_SUPPORT_AVX512_SKX 0 +# define CV_CPU_CALL_AVX512_SKX(fn, args) +# define CV_CPU_CALL_AVX512_SKX_(fn, args) +#endif +#define __CV_CPU_DISPATCH_CHAIN_AVX512_SKX(fn, args, mode, ...) CV_CPU_CALL_AVX512_SKX(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__)) + +#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_AVX512_CNL +# define CV_TRY_AVX512_CNL 1 +# define CV_CPU_FORCE_AVX512_CNL 1 +# define CV_CPU_HAS_SUPPORT_AVX512_CNL 1 +# define CV_CPU_CALL_AVX512_CNL(fn, args) return (cpu_baseline::fn args) +# define CV_CPU_CALL_AVX512_CNL_(fn, args) return (opt_AVX512_CNL::fn args) +#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_AVX512_CNL +# define CV_TRY_AVX512_CNL 1 +# define CV_CPU_FORCE_AVX512_CNL 0 +# define CV_CPU_HAS_SUPPORT_AVX512_CNL (cv::checkHardwareSupport(CV_CPU_AVX512_CNL)) +# define CV_CPU_CALL_AVX512_CNL(fn, args) if (CV_CPU_HAS_SUPPORT_AVX512_CNL) return (opt_AVX512_CNL::fn args) +# define CV_CPU_CALL_AVX512_CNL_(fn, args) if (CV_CPU_HAS_SUPPORT_AVX512_CNL) return (opt_AVX512_CNL::fn args) +#else +# define CV_TRY_AVX512_CNL 0 +# define CV_CPU_FORCE_AVX512_CNL 0 +# define CV_CPU_HAS_SUPPORT_AVX512_CNL 0 +# define CV_CPU_CALL_AVX512_CNL(fn, args) +# define CV_CPU_CALL_AVX512_CNL_(fn, args) +#endif +#define __CV_CPU_DISPATCH_CHAIN_AVX512_CNL(fn, args, mode, ...) CV_CPU_CALL_AVX512_CNL(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__)) + +#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_AVX512_CLX +# define CV_TRY_AVX512_CLX 1 +# define CV_CPU_FORCE_AVX512_CLX 1 +# define CV_CPU_HAS_SUPPORT_AVX512_CLX 1 +# define CV_CPU_CALL_AVX512_CLX(fn, args) return (cpu_baseline::fn args) +# define CV_CPU_CALL_AVX512_CLX_(fn, args) return (opt_AVX512_CLX::fn args) +#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_AVX512_CLX +# define CV_TRY_AVX512_CLX 1 +# define CV_CPU_FORCE_AVX512_CLX 0 +# define CV_CPU_HAS_SUPPORT_AVX512_CLX (cv::checkHardwareSupport(CV_CPU_AVX512_CLX)) +# define CV_CPU_CALL_AVX512_CLX(fn, args) if (CV_CPU_HAS_SUPPORT_AVX512_CLX) return (opt_AVX512_CLX::fn args) +# define CV_CPU_CALL_AVX512_CLX_(fn, args) if (CV_CPU_HAS_SUPPORT_AVX512_CLX) return (opt_AVX512_CLX::fn args) +#else +# define CV_TRY_AVX512_CLX 0 +# define CV_CPU_FORCE_AVX512_CLX 0 +# define CV_CPU_HAS_SUPPORT_AVX512_CLX 0 +# define CV_CPU_CALL_AVX512_CLX(fn, args) +# define CV_CPU_CALL_AVX512_CLX_(fn, args) +#endif +#define __CV_CPU_DISPATCH_CHAIN_AVX512_CLX(fn, args, mode, ...) CV_CPU_CALL_AVX512_CLX(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__)) + +#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_AVX512_ICL +# define CV_TRY_AVX512_ICL 1 +# define CV_CPU_FORCE_AVX512_ICL 1 +# define CV_CPU_HAS_SUPPORT_AVX512_ICL 1 +# define CV_CPU_CALL_AVX512_ICL(fn, args) return (cpu_baseline::fn args) +# define CV_CPU_CALL_AVX512_ICL_(fn, args) return (opt_AVX512_ICL::fn args) +#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_AVX512_ICL +# define CV_TRY_AVX512_ICL 1 +# define CV_CPU_FORCE_AVX512_ICL 0 +# define CV_CPU_HAS_SUPPORT_AVX512_ICL (cv::checkHardwareSupport(CV_CPU_AVX512_ICL)) +# define CV_CPU_CALL_AVX512_ICL(fn, args) if (CV_CPU_HAS_SUPPORT_AVX512_ICL) return (opt_AVX512_ICL::fn args) +# define CV_CPU_CALL_AVX512_ICL_(fn, args) if (CV_CPU_HAS_SUPPORT_AVX512_ICL) return (opt_AVX512_ICL::fn args) +#else +# define CV_TRY_AVX512_ICL 0 +# define CV_CPU_FORCE_AVX512_ICL 0 +# define CV_CPU_HAS_SUPPORT_AVX512_ICL 0 +# define CV_CPU_CALL_AVX512_ICL(fn, args) +# define CV_CPU_CALL_AVX512_ICL_(fn, args) +#endif +#define __CV_CPU_DISPATCH_CHAIN_AVX512_ICL(fn, args, mode, ...) CV_CPU_CALL_AVX512_ICL(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__)) + +#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_NEON +# define CV_TRY_NEON 1 +# define CV_CPU_FORCE_NEON 1 +# define CV_CPU_HAS_SUPPORT_NEON 1 +# define CV_CPU_CALL_NEON(fn, args) return (cpu_baseline::fn args) +# define CV_CPU_CALL_NEON_(fn, args) return (opt_NEON::fn args) +#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_NEON +# define CV_TRY_NEON 1 +# define CV_CPU_FORCE_NEON 0 +# define CV_CPU_HAS_SUPPORT_NEON (cv::checkHardwareSupport(CV_CPU_NEON)) +# define CV_CPU_CALL_NEON(fn, args) if (CV_CPU_HAS_SUPPORT_NEON) return (opt_NEON::fn args) +# define CV_CPU_CALL_NEON_(fn, args) if (CV_CPU_HAS_SUPPORT_NEON) return (opt_NEON::fn args) +#else +# define CV_TRY_NEON 0 +# define CV_CPU_FORCE_NEON 0 +# define CV_CPU_HAS_SUPPORT_NEON 0 +# define CV_CPU_CALL_NEON(fn, args) +# define CV_CPU_CALL_NEON_(fn, args) +#endif +#define __CV_CPU_DISPATCH_CHAIN_NEON(fn, args, mode, ...) CV_CPU_CALL_NEON(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__)) + +#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_MSA +# define CV_TRY_MSA 1 +# define CV_CPU_FORCE_MSA 1 +# define CV_CPU_HAS_SUPPORT_MSA 1 +# define CV_CPU_CALL_MSA(fn, args) return (cpu_baseline::fn args) +# define CV_CPU_CALL_MSA_(fn, args) return (opt_MSA::fn args) +#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_MSA +# define CV_TRY_MSA 1 +# define CV_CPU_FORCE_MSA 0 +# define CV_CPU_HAS_SUPPORT_MSA (cv::checkHardwareSupport(CV_CPU_MSA)) +# define CV_CPU_CALL_MSA(fn, args) if (CV_CPU_HAS_SUPPORT_MSA) return (opt_MSA::fn args) +# define CV_CPU_CALL_MSA_(fn, args) if (CV_CPU_HAS_SUPPORT_MSA) return (opt_MSA::fn args) +#else +# define CV_TRY_MSA 0 +# define CV_CPU_FORCE_MSA 0 +# define CV_CPU_HAS_SUPPORT_MSA 0 +# define CV_CPU_CALL_MSA(fn, args) +# define CV_CPU_CALL_MSA_(fn, args) +#endif +#define __CV_CPU_DISPATCH_CHAIN_MSA(fn, args, mode, ...) CV_CPU_CALL_MSA(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__)) + +#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_VSX +# define CV_TRY_VSX 1 +# define CV_CPU_FORCE_VSX 1 +# define CV_CPU_HAS_SUPPORT_VSX 1 +# define CV_CPU_CALL_VSX(fn, args) return (cpu_baseline::fn args) +# define CV_CPU_CALL_VSX_(fn, args) return (opt_VSX::fn args) +#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_VSX +# define CV_TRY_VSX 1 +# define CV_CPU_FORCE_VSX 0 +# define CV_CPU_HAS_SUPPORT_VSX (cv::checkHardwareSupport(CV_CPU_VSX)) +# define CV_CPU_CALL_VSX(fn, args) if (CV_CPU_HAS_SUPPORT_VSX) return (opt_VSX::fn args) +# define CV_CPU_CALL_VSX_(fn, args) if (CV_CPU_HAS_SUPPORT_VSX) return (opt_VSX::fn args) +#else +# define CV_TRY_VSX 0 +# define CV_CPU_FORCE_VSX 0 +# define CV_CPU_HAS_SUPPORT_VSX 0 +# define CV_CPU_CALL_VSX(fn, args) +# define CV_CPU_CALL_VSX_(fn, args) +#endif +#define __CV_CPU_DISPATCH_CHAIN_VSX(fn, args, mode, ...) CV_CPU_CALL_VSX(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__)) + +#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_VSX3 +# define CV_TRY_VSX3 1 +# define CV_CPU_FORCE_VSX3 1 +# define CV_CPU_HAS_SUPPORT_VSX3 1 +# define CV_CPU_CALL_VSX3(fn, args) return (cpu_baseline::fn args) +# define CV_CPU_CALL_VSX3_(fn, args) return (opt_VSX3::fn args) +#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_VSX3 +# define CV_TRY_VSX3 1 +# define CV_CPU_FORCE_VSX3 0 +# define CV_CPU_HAS_SUPPORT_VSX3 (cv::checkHardwareSupport(CV_CPU_VSX3)) +# define CV_CPU_CALL_VSX3(fn, args) if (CV_CPU_HAS_SUPPORT_VSX3) return (opt_VSX3::fn args) +# define CV_CPU_CALL_VSX3_(fn, args) if (CV_CPU_HAS_SUPPORT_VSX3) return (opt_VSX3::fn args) +#else +# define CV_TRY_VSX3 0 +# define CV_CPU_FORCE_VSX3 0 +# define CV_CPU_HAS_SUPPORT_VSX3 0 +# define CV_CPU_CALL_VSX3(fn, args) +# define CV_CPU_CALL_VSX3_(fn, args) +#endif +#define __CV_CPU_DISPATCH_CHAIN_VSX3(fn, args, mode, ...) CV_CPU_CALL_VSX3(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__)) + +#define CV_CPU_CALL_BASELINE(fn, args) return (cpu_baseline::fn args) +#define __CV_CPU_DISPATCH_CHAIN_BASELINE(fn, args, mode, ...) CV_CPU_CALL_BASELINE(fn, args) /* last in sequence */ diff --git a/IPL/include/opencv/opencv2/core/cvdef.h b/IPL/include/opencv/opencv2/core/cvdef.h index af2abfb..e66a646 100644 --- a/IPL/include/opencv/opencv2/core/cvdef.h +++ b/IPL/include/opencv/opencv2/core/cvdef.h @@ -42,16 +42,124 @@ // //M*/ -#ifndef __OPENCV_CORE_CVDEF_H__ -#define __OPENCV_CORE_CVDEF_H__ +#ifndef OPENCV_CORE_CVDEF_H +#define OPENCV_CORE_CVDEF_H //! @addtogroup core_utils //! @{ -#if !defined _CRT_SECURE_NO_DEPRECATE && defined _MSC_VER && _MSC_VER > 1300 -# define _CRT_SECURE_NO_DEPRECATE /* to avoid multiple Visual Studio warnings */ +#ifdef OPENCV_INCLUDE_PORT_FILE // User-provided header file with custom platform configuration +#include OPENCV_INCLUDE_PORT_FILE #endif +#if !defined CV_DOXYGEN && !defined CV_IGNORE_DEBUG_BUILD_GUARD +#if (defined(_MSC_VER) && (defined(DEBUG) || defined(_DEBUG))) || \ + (defined(_GLIBCXX_DEBUG) || defined(_GLIBCXX_DEBUG_PEDANTIC)) +// Guard to prevent using of binary incompatible binaries / runtimes +// https://github.com/opencv/opencv/pull/9161 +#define CV__DEBUG_NS_BEGIN namespace debug_build_guard { +#define CV__DEBUG_NS_END } +namespace cv { namespace debug_build_guard { } using namespace debug_build_guard; } +#endif +#endif + +#ifndef CV__DEBUG_NS_BEGIN +#define CV__DEBUG_NS_BEGIN +#define CV__DEBUG_NS_END +#endif + + +#ifdef __OPENCV_BUILD +#include "cvconfig.h" +#endif + +#ifndef __CV_EXPAND +#define __CV_EXPAND(x) x +#endif + +#ifndef __CV_CAT +#define __CV_CAT__(x, y) x ## y +#define __CV_CAT_(x, y) __CV_CAT__(x, y) +#define __CV_CAT(x, y) __CV_CAT_(x, y) +#endif + +#define __CV_VA_NUM_ARGS_HELPER(_1, _2, _3, _4, _5, _6, _7, _8, _9, _10, N, ...) N +#define __CV_VA_NUM_ARGS(...) __CV_EXPAND(__CV_VA_NUM_ARGS_HELPER(__VA_ARGS__, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)) + +#if defined __GNUC__ +#define CV_Func __func__ +#elif defined _MSC_VER +#define CV_Func __FUNCTION__ +#else +#define CV_Func "" +#endif + +//! @cond IGNORED + +//////////////// static assert ///////////////// +#define CVAUX_CONCAT_EXP(a, b) a##b +#define CVAUX_CONCAT(a, b) CVAUX_CONCAT_EXP(a,b) + +#if defined(__clang__) +# ifndef __has_extension +# define __has_extension __has_feature /* compatibility, for older versions of clang */ +# endif +# if __has_extension(cxx_static_assert) +# define CV_StaticAssert(condition, reason) static_assert((condition), reason " " #condition) +# elif __has_extension(c_static_assert) +# define CV_StaticAssert(condition, reason) _Static_assert((condition), reason " " #condition) +# endif +#elif defined(__GNUC__) +# if (defined(__GXX_EXPERIMENTAL_CXX0X__) || __cplusplus >= 201103L) +# define CV_StaticAssert(condition, reason) static_assert((condition), reason " " #condition) +# endif +#elif defined(_MSC_VER) +# if _MSC_VER >= 1600 /* MSVC 10 */ +# define CV_StaticAssert(condition, reason) static_assert((condition), reason " " #condition) +# endif +#endif +#ifndef CV_StaticAssert +# if !defined(__clang__) && defined(__GNUC__) && (__GNUC__*100 + __GNUC_MINOR__ > 302) +# define CV_StaticAssert(condition, reason) ({ extern int __attribute__((error("CV_StaticAssert: " reason " " #condition))) CV_StaticAssert(); ((condition) ? 0 : CV_StaticAssert()); }) +# else +namespace cv { + template struct CV_StaticAssert_failed; + template <> struct CV_StaticAssert_failed { enum { val = 1 }; }; + template struct CV_StaticAssert_test {}; +} +# define CV_StaticAssert(condition, reason)\ + typedef cv::CV_StaticAssert_test< sizeof(cv::CV_StaticAssert_failed< static_cast(condition) >) > CVAUX_CONCAT(CV_StaticAssert_failed_at_, __LINE__) +# endif +#endif + +// Suppress warning "-Wdeprecated-declarations" / C4996 +#if defined(_MSC_VER) + #define CV_DO_PRAGMA(x) __pragma(x) +#elif defined(__GNUC__) + #define CV_DO_PRAGMA(x) _Pragma (#x) +#else + #define CV_DO_PRAGMA(x) +#endif + +#ifdef _MSC_VER +#define CV_SUPPRESS_DEPRECATED_START \ + CV_DO_PRAGMA(warning(push)) \ + CV_DO_PRAGMA(warning(disable: 4996)) +#define CV_SUPPRESS_DEPRECATED_END CV_DO_PRAGMA(warning(pop)) +#elif defined (__clang__) || ((__GNUC__) && (__GNUC__*100 + __GNUC_MINOR__ > 405)) +#define CV_SUPPRESS_DEPRECATED_START \ + CV_DO_PRAGMA(GCC diagnostic push) \ + CV_DO_PRAGMA(GCC diagnostic ignored "-Wdeprecated-declarations") +#define CV_SUPPRESS_DEPRECATED_END CV_DO_PRAGMA(GCC diagnostic pop) +#else +#define CV_SUPPRESS_DEPRECATED_START +#define CV_SUPPRESS_DEPRECATED_END +#endif + +#define CV_UNUSED(name) (void)name + +//! @endcond + // undef problematic defines sometimes defined by system headers (windows.h in particular) #undef small #undef min @@ -59,11 +167,12 @@ #undef abs #undef Complex -#if !defined _CRT_SECURE_NO_DEPRECATE && defined _MSC_VER && _MSC_VER > 1300 -# define _CRT_SECURE_NO_DEPRECATE /* to avoid multiple Visual Studio warnings */ +#if defined __cplusplus +#include +#else +#include #endif -#include #include "opencv2/core/hal/interface.h" #if defined __ICL @@ -88,7 +197,17 @@ # endif #endif -#if defined CV_ICC && !defined CV_ENABLE_UNROLLED +#ifndef CV_ALWAYS_INLINE +#if defined(__GNUC__) && (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1)) +#define CV_ALWAYS_INLINE inline __attribute__((always_inline)) +#elif defined(_MSC_VER) +#define CV_ALWAYS_INLINE __forceinline +#else +#define CV_ALWAYS_INLINE inline +#endif +#endif + +#if defined CV_DISABLE_OPTIMIZATION || (defined CV_ICC && !defined CV_ENABLE_UNROLLED) # define CV_ENABLE_UNROLLED 0 #else # define CV_ENABLE_UNROLLED 1 @@ -112,7 +231,7 @@ #define CV_CPU_SSE4_1 6 #define CV_CPU_SSE4_2 7 #define CV_CPU_POPCNT 8 - +#define CV_CPU_FP16 9 #define CV_CPU_AVX 10 #define CV_CPU_AVX2 11 #define CV_CPU_FMA3 12 @@ -122,15 +241,36 @@ #define CV_CPU_AVX_512CD 15 #define CV_CPU_AVX_512DQ 16 #define CV_CPU_AVX_512ER 17 -#define CV_CPU_AVX_512IFMA512 18 +#define CV_CPU_AVX_512IFMA512 18 // deprecated +#define CV_CPU_AVX_512IFMA 18 #define CV_CPU_AVX_512PF 19 #define CV_CPU_AVX_512VBMI 20 #define CV_CPU_AVX_512VL 21 +#define CV_CPU_AVX_512VBMI2 22 +#define CV_CPU_AVX_512VNNI 23 +#define CV_CPU_AVX_512BITALG 24 +#define CV_CPU_AVX_512VPOPCNTDQ 25 +#define CV_CPU_AVX_5124VNNIW 26 +#define CV_CPU_AVX_5124FMAPS 27 + +#define CV_CPU_NEON 100 -#define CV_CPU_NEON 100 +#define CV_CPU_MSA 150 + +#define CV_CPU_VSX 200 +#define CV_CPU_VSX3 201 + +// CPU features groups +#define CV_CPU_AVX512_SKX 256 +#define CV_CPU_AVX512_COMMON 257 +#define CV_CPU_AVX512_KNL 258 +#define CV_CPU_AVX512_KNM 259 +#define CV_CPU_AVX512_CNL 260 +#define CV_CPU_AVX512_CLX 261 +#define CV_CPU_AVX512_ICL 262 // when adding to this list remember to update the following enum -#define CV_HARDWARE_MAX_FEATURE 255 +#define CV_HARDWARE_MAX_FEATURE 512 /** @brief Available CPU features. */ @@ -143,7 +283,7 @@ enum CpuFeatures { CPU_SSE4_1 = 6, CPU_SSE4_2 = 7, CPU_POPCNT = 8, - + CPU_FP16 = 9, CPU_AVX = 10, CPU_AVX2 = 11, CPU_FMA3 = 12, @@ -153,156 +293,69 @@ enum CpuFeatures { CPU_AVX_512CD = 15, CPU_AVX_512DQ = 16, CPU_AVX_512ER = 17, - CPU_AVX_512IFMA512 = 18, + CPU_AVX_512IFMA512 = 18, // deprecated + CPU_AVX_512IFMA = 18, CPU_AVX_512PF = 19, CPU_AVX_512VBMI = 20, CPU_AVX_512VL = 21, + CPU_AVX_512VBMI2 = 22, + CPU_AVX_512VNNI = 23, + CPU_AVX_512BITALG = 24, + CPU_AVX_512VPOPCNTDQ= 25, + CPU_AVX_5124VNNIW = 26, + CPU_AVX_5124FMAPS = 27, - CPU_NEON = 100 -}; + CPU_NEON = 100, -// do not include SSE/AVX/NEON headers for NVCC compiler -#ifndef __CUDACC__ - -#if defined __SSE2__ || defined _M_X64 || (defined _M_IX86_FP && _M_IX86_FP >= 2) -# include -# define CV_MMX 1 -# define CV_SSE 1 -# define CV_SSE2 1 -# if defined __SSE3__ || (defined _MSC_VER && _MSC_VER >= 1500) -# include -# define CV_SSE3 1 -# endif -# if defined __SSSE3__ || (defined _MSC_VER && _MSC_VER >= 1500) -# include -# define CV_SSSE3 1 -# endif -# if defined __SSE4_1__ || (defined _MSC_VER && _MSC_VER >= 1500) -# include -# define CV_SSE4_1 1 -# endif -# if defined __SSE4_2__ || (defined _MSC_VER && _MSC_VER >= 1500) -# include -# define CV_SSE4_2 1 -# endif -# if defined __POPCNT__ || (defined _MSC_VER && _MSC_VER >= 1500) -# ifdef _MSC_VER -# include -# else -# include -# endif -# define CV_POPCNT 1 -# endif -# if defined __AVX__ || (defined _MSC_VER && _MSC_VER >= 1600 && 0) -// MS Visual Studio 2010 (2012?) has no macro pre-defined to identify the use of /arch:AVX -// See: http://connect.microsoft.com/VisualStudio/feedback/details/605858/arch-avx-should-define-a-predefined-macro-in-x64-and-set-a-unique-value-for-m-ix86-fp-in-win32 -# include -# define CV_AVX 1 -# if defined(_XCR_XFEATURE_ENABLED_MASK) -# define __xgetbv() _xgetbv(_XCR_XFEATURE_ENABLED_MASK) -# else -# define __xgetbv() 0 -# endif -# endif -# if defined __AVX2__ || (defined _MSC_VER && _MSC_VER >= 1800 && 0) -# include -# define CV_AVX2 1 -# if defined __FMA__ -# define CV_FMA3 1 -# endif -# endif -#endif + CPU_MSA = 150, -#if (defined WIN32 || defined _WIN32) && defined(_M_ARM) -# include -# include "arm_neon.h" -# define CV_NEON 1 -# define CPU_HAS_NEON_FEATURE (true) -#elif defined(__ARM_NEON__) || (defined (__ARM_NEON) && defined(__aarch64__)) -# include -# define CV_NEON 1 -#endif + CPU_VSX = 200, + CPU_VSX3 = 201, -#if defined __GNUC__ && defined __arm__ && (defined __ARM_PCS_VFP || defined __ARM_VFPV3__ || defined __ARM_NEON__) && !defined __SOFTFP__ -# define CV_VFP 1 -#endif + CPU_AVX512_SKX = 256, //!< Skylake-X with AVX-512F/CD/BW/DQ/VL + CPU_AVX512_COMMON = 257, //!< Common instructions AVX-512F/CD for all CPUs that support AVX-512 + CPU_AVX512_KNL = 258, //!< Knights Landing with AVX-512F/CD/ER/PF + CPU_AVX512_KNM = 259, //!< Knights Mill with AVX-512F/CD/ER/PF/4FMAPS/4VNNIW/VPOPCNTDQ + CPU_AVX512_CNL = 260, //!< Cannon Lake with AVX-512F/CD/BW/DQ/VL/IFMA/VBMI + CPU_AVX512_CLX = 261, //!< Cascade Lake with AVX-512F/CD/BW/DQ/VL/VNNI + CPU_AVX512_ICL = 262, //!< Ice Lake with AVX-512F/CD/BW/DQ/VL/IFMA/VBMI/VNNI/VBMI2/BITALG/VPOPCNTDQ -#endif // __CUDACC__ + CPU_MAX_FEATURE = 512 // see CV_HARDWARE_MAX_FEATURE +}; -#ifndef CV_POPCNT -#define CV_POPCNT 0 -#endif -#ifndef CV_MMX -# define CV_MMX 0 -#endif -#ifndef CV_SSE -# define CV_SSE 0 -#endif -#ifndef CV_SSE2 -# define CV_SSE2 0 -#endif -#ifndef CV_SSE3 -# define CV_SSE3 0 -#endif -#ifndef CV_SSSE3 -# define CV_SSSE3 0 -#endif -#ifndef CV_SSE4_1 -# define CV_SSE4_1 0 -#endif -#ifndef CV_SSE4_2 -# define CV_SSE4_2 0 -#endif -#ifndef CV_AVX -# define CV_AVX 0 -#endif -#ifndef CV_AVX2 -# define CV_AVX2 0 -#endif -#ifndef CV_FMA3 -# define CV_FMA3 0 -#endif -#ifndef CV_AVX_512F -# define CV_AVX_512F 0 -#endif -#ifndef CV_AVX_512BW -# define CV_AVX_512BW 0 -#endif -#ifndef CV_AVX_512CD -# define CV_AVX_512CD 0 -#endif -#ifndef CV_AVX_512DQ -# define CV_AVX_512DQ 0 -#endif -#ifndef CV_AVX_512ER -# define CV_AVX_512ER 0 -#endif -#ifndef CV_AVX_512IFMA512 -# define CV_AVX_512IFMA512 0 -#endif -#ifndef CV_AVX_512PF -# define CV_AVX_512PF 0 -#endif -#ifndef CV_AVX_512VBMI -# define CV_AVX_512VBMI 0 -#endif -#ifndef CV_AVX_512VL -# define CV_AVX_512VL 0 -#endif -#ifndef CV_NEON -# define CV_NEON 0 -#endif +#include "cv_cpu_dispatch.h" -#ifndef CV_VFP -# define CV_VFP 0 +#if !defined(CV_STRONG_ALIGNMENT) && defined(__arm__) && !(defined(__aarch64__) || defined(_M_ARM64)) +// int*, int64* should be propertly aligned pointers on ARMv7 +#define CV_STRONG_ALIGNMENT 1 +#endif +#if !defined(CV_STRONG_ALIGNMENT) +#define CV_STRONG_ALIGNMENT 0 #endif /* fundamental constants */ #define CV_PI 3.1415926535897932384626433832795 -#define CV_2PI 6.283185307179586476925286766559 +#define CV_2PI 6.283185307179586476925286766559 #define CV_LOG2 0.69314718055994530941723212145818 +#if defined __ARM_FP16_FORMAT_IEEE \ + && !defined __CUDACC__ +# define CV_FP16_TYPE 1 +#else +# define CV_FP16_TYPE 0 +#endif + +typedef union Cv16suf +{ + short i; + ushort u; +#if CV_FP16_TYPE + __fp16 h; +#endif +} +Cv16suf; + typedef union Cv32suf { int i; @@ -319,20 +372,50 @@ typedef union Cv64suf } Cv64suf; -#define OPENCV_ABI_COMPATIBILITY 300 +#define OPENCV_ABI_COMPATIBILITY 400 #ifdef __OPENCV_BUILD -# define DISABLE_OPENCV_24_COMPATIBILITY +# define DISABLE_OPENCV_3_COMPATIBILITY +# define OPENCV_DISABLE_DEPRECATED_COMPATIBILITY #endif -#if (defined WIN32 || defined _WIN32 || defined WINCE || defined __CYGWIN__) && defined CVAPI_EXPORTS -# define CV_EXPORTS __declspec(dllexport) -#elif defined __GNUC__ && __GNUC__ >= 4 -# define CV_EXPORTS __attribute__ ((visibility ("default"))) +#ifndef CV_EXPORTS +# if (defined _WIN32 || defined WINCE || defined __CYGWIN__) && defined(CVAPI_EXPORTS) +# define CV_EXPORTS __declspec(dllexport) +# elif defined __GNUC__ && __GNUC__ >= 4 && (defined(CVAPI_EXPORTS) || defined(__APPLE__)) +# define CV_EXPORTS __attribute__ ((visibility ("default"))) +# endif +#endif + +#ifndef CV_EXPORTS +# define CV_EXPORTS +#endif + +#ifdef _MSC_VER +# define CV_EXPORTS_TEMPLATE #else -# define CV_EXPORTS +# define CV_EXPORTS_TEMPLATE CV_EXPORTS +#endif + +#ifndef CV_DEPRECATED +# if defined(__GNUC__) +# define CV_DEPRECATED __attribute__ ((deprecated)) +# elif defined(_MSC_VER) +# define CV_DEPRECATED __declspec(deprecated) +# else +# define CV_DEPRECATED +# endif +#endif + +#ifndef CV_DEPRECATED_EXTERNAL +# if defined(__OPENCV_BUILD) +# define CV_DEPRECATED_EXTERNAL /* nothing */ +# else +# define CV_DEPRECATED_EXTERNAL CV_DEPRECATED +# endif #endif + #ifndef CV_EXTERN_C # ifdef __cplusplus # define CV_EXTERN_C extern "C" @@ -352,72 +435,14 @@ Cv64suf; #define CV_PROP_RW #define CV_WRAP #define CV_WRAP_AS(synonym) +#define CV_WRAP_MAPPABLE(mappable) +#define CV_WRAP_PHANTOM(phantom_header) +#define CV_WRAP_DEFAULT(val) /****************************************************************************************\ * Matrix type (Mat) * \****************************************************************************************/ -#define CV_CN_MAX 512 -#define CV_CN_SHIFT 3 -#define CV_DEPTH_MAX (1 << CV_CN_SHIFT) - -#define CV_8U 0 -#define CV_8S 1 -#define CV_16U 2 -#define CV_16S 3 -#define CV_32S 4 -#define CV_32F 5 -#define CV_64F 6 -#define CV_USRTYPE1 7 - -#define CV_MAT_DEPTH_MASK (CV_DEPTH_MAX - 1) -#define CV_MAT_DEPTH(flags) ((flags) & CV_MAT_DEPTH_MASK) - -#define CV_MAKETYPE(depth,cn) (CV_MAT_DEPTH(depth) + (((cn)-1) << CV_CN_SHIFT)) -#define CV_MAKE_TYPE CV_MAKETYPE - -#define CV_8UC1 CV_MAKETYPE(CV_8U,1) -#define CV_8UC2 CV_MAKETYPE(CV_8U,2) -#define CV_8UC3 CV_MAKETYPE(CV_8U,3) -#define CV_8UC4 CV_MAKETYPE(CV_8U,4) -#define CV_8UC(n) CV_MAKETYPE(CV_8U,(n)) - -#define CV_8SC1 CV_MAKETYPE(CV_8S,1) -#define CV_8SC2 CV_MAKETYPE(CV_8S,2) -#define CV_8SC3 CV_MAKETYPE(CV_8S,3) -#define CV_8SC4 CV_MAKETYPE(CV_8S,4) -#define CV_8SC(n) CV_MAKETYPE(CV_8S,(n)) - -#define CV_16UC1 CV_MAKETYPE(CV_16U,1) -#define CV_16UC2 CV_MAKETYPE(CV_16U,2) -#define CV_16UC3 CV_MAKETYPE(CV_16U,3) -#define CV_16UC4 CV_MAKETYPE(CV_16U,4) -#define CV_16UC(n) CV_MAKETYPE(CV_16U,(n)) - -#define CV_16SC1 CV_MAKETYPE(CV_16S,1) -#define CV_16SC2 CV_MAKETYPE(CV_16S,2) -#define CV_16SC3 CV_MAKETYPE(CV_16S,3) -#define CV_16SC4 CV_MAKETYPE(CV_16S,4) -#define CV_16SC(n) CV_MAKETYPE(CV_16S,(n)) - -#define CV_32SC1 CV_MAKETYPE(CV_32S,1) -#define CV_32SC2 CV_MAKETYPE(CV_32S,2) -#define CV_32SC3 CV_MAKETYPE(CV_32S,3) -#define CV_32SC4 CV_MAKETYPE(CV_32S,4) -#define CV_32SC(n) CV_MAKETYPE(CV_32S,(n)) - -#define CV_32FC1 CV_MAKETYPE(CV_32F,1) -#define CV_32FC2 CV_MAKETYPE(CV_32F,2) -#define CV_32FC3 CV_MAKETYPE(CV_32F,3) -#define CV_32FC4 CV_MAKETYPE(CV_32F,4) -#define CV_32FC(n) CV_MAKETYPE(CV_32F,(n)) - -#define CV_64FC1 CV_MAKETYPE(CV_64F,1) -#define CV_64FC2 CV_MAKETYPE(CV_64F,2) -#define CV_64FC3 CV_MAKETYPE(CV_64F,3) -#define CV_64FC4 CV_MAKETYPE(CV_64F,4) -#define CV_64FC(n) CV_MAKETYPE(CV_64F,(n)) - #define CV_MAT_CN_MASK ((CV_CN_MAX - 1) << CV_CN_SHIFT) #define CV_MAT_CN(flags) ((((flags) & CV_MAT_CN_MASK) >> CV_CN_SHIFT) + 1) #define CV_MAT_TYPE_MASK (CV_DEPTH_MAX*CV_CN_MAX - 1) @@ -431,13 +456,10 @@ Cv64suf; #define CV_IS_SUBMAT(flags) ((flags) & CV_MAT_SUBMAT_FLAG) /** Size of each channel item, - 0x124489 = 1000 0100 0100 0010 0010 0001 0001 ~ array of sizeof(arr_type_elem) */ -#define CV_ELEM_SIZE1(type) \ - ((((sizeof(size_t)<<28)|0x8442211) >> CV_MAT_DEPTH(type)*4) & 15) + 0x28442211 = 0010 1000 0100 0100 0010 0010 0001 0001 ~ array of sizeof(arr_type_elem) */ +#define CV_ELEM_SIZE1(type) ((0x28442211 >> CV_MAT_DEPTH(type)*4) & 15) -/** 0x3a50 = 11 10 10 01 01 00 00 ~ array of log2(sizeof(arr_type_elem)) */ -#define CV_ELEM_SIZE(type) \ - (CV_MAT_CN(type) << ((((sizeof(size_t)/4+1)*16384|0x3a50) >> CV_MAT_DEPTH(type)*2) & 3)) +#define CV_ELEM_SIZE(type) (CV_MAT_CN(type)*CV_ELEM_SIZE1(type)) #ifndef MIN # define MIN(a,b) ((a) > (b) ? (b) : (a)) @@ -447,15 +469,179 @@ Cv64suf; # define MAX(a,b) ((a) < (b) ? (b) : (a)) #endif +///////////////////////////////////////// Enum operators /////////////////////////////////////// + +/** + +Provides compatibility operators for both classical and C++11 enum classes, +as well as exposing the C++11 enum class members for backwards compatibility + +@code + // Provides operators required for flag enums + CV_ENUM_FLAGS(AccessFlag) + + // Exposes the listed members of the enum class AccessFlag to the current namespace + CV_ENUM_CLASS_EXPOSE(AccessFlag, ACCESS_READ [, ACCESS_WRITE [, ...] ]); +@endcode +*/ + +#define __CV_ENUM_CLASS_EXPOSE_1(EnumType, MEMBER_CONST) \ +static const EnumType MEMBER_CONST = EnumType::MEMBER_CONST; \ + +#define __CV_ENUM_CLASS_EXPOSE_2(EnumType, MEMBER_CONST, ...) \ +__CV_ENUM_CLASS_EXPOSE_1(EnumType, MEMBER_CONST); \ +__CV_EXPAND(__CV_ENUM_CLASS_EXPOSE_1(EnumType, __VA_ARGS__)); \ + +#define __CV_ENUM_CLASS_EXPOSE_3(EnumType, MEMBER_CONST, ...) \ +__CV_ENUM_CLASS_EXPOSE_1(EnumType, MEMBER_CONST); \ +__CV_EXPAND(__CV_ENUM_CLASS_EXPOSE_2(EnumType, __VA_ARGS__)); \ + +#define __CV_ENUM_CLASS_EXPOSE_4(EnumType, MEMBER_CONST, ...) \ +__CV_ENUM_CLASS_EXPOSE_1(EnumType, MEMBER_CONST); \ +__CV_EXPAND(__CV_ENUM_CLASS_EXPOSE_3(EnumType, __VA_ARGS__)); \ + +#define __CV_ENUM_CLASS_EXPOSE_5(EnumType, MEMBER_CONST, ...) \ +__CV_ENUM_CLASS_EXPOSE_1(EnumType, MEMBER_CONST); \ +__CV_EXPAND(__CV_ENUM_CLASS_EXPOSE_4(EnumType, __VA_ARGS__)); \ + +#define __CV_ENUM_CLASS_EXPOSE_6(EnumType, MEMBER_CONST, ...) \ +__CV_ENUM_CLASS_EXPOSE_1(EnumType, MEMBER_CONST); \ +__CV_EXPAND(__CV_ENUM_CLASS_EXPOSE_5(EnumType, __VA_ARGS__)); \ + +#define __CV_ENUM_CLASS_EXPOSE_7(EnumType, MEMBER_CONST, ...) \ +__CV_ENUM_CLASS_EXPOSE_1(EnumType, MEMBER_CONST); \ +__CV_EXPAND(__CV_ENUM_CLASS_EXPOSE_6(EnumType, __VA_ARGS__)); \ + +#define __CV_ENUM_CLASS_EXPOSE_8(EnumType, MEMBER_CONST, ...) \ +__CV_ENUM_CLASS_EXPOSE_1(EnumType, MEMBER_CONST); \ +__CV_EXPAND(__CV_ENUM_CLASS_EXPOSE_7(EnumType, __VA_ARGS__)); \ + +#define __CV_ENUM_CLASS_EXPOSE_9(EnumType, MEMBER_CONST, ...) \ +__CV_ENUM_CLASS_EXPOSE_1(EnumType, MEMBER_CONST); \ +__CV_EXPAND(__CV_ENUM_CLASS_EXPOSE_8(EnumType, __VA_ARGS__)); \ + +#define __CV_ENUM_FLAGS_LOGICAL_NOT(EnumType) \ +static inline bool operator!(const EnumType& val) \ +{ \ + typedef std::underlying_type::type UnderlyingType; \ + return !static_cast(val); \ +} \ + +#define __CV_ENUM_FLAGS_LOGICAL_NOT_EQ(Arg1Type, Arg2Type) \ +static inline bool operator!=(const Arg1Type& a, const Arg2Type& b) \ +{ \ + return static_cast(a) != static_cast(b); \ +} \ + +#define __CV_ENUM_FLAGS_LOGICAL_EQ(Arg1Type, Arg2Type) \ +static inline bool operator==(const Arg1Type& a, const Arg2Type& b) \ +{ \ + return static_cast(a) == static_cast(b); \ +} \ + +#define __CV_ENUM_FLAGS_BITWISE_NOT(EnumType) \ +static inline EnumType operator~(const EnumType& val) \ +{ \ + typedef std::underlying_type::type UnderlyingType; \ + return static_cast(~static_cast(val)); \ +} \ + +#define __CV_ENUM_FLAGS_BITWISE_OR(EnumType, Arg1Type, Arg2Type) \ +static inline EnumType operator|(const Arg1Type& a, const Arg2Type& b) \ +{ \ + typedef std::underlying_type::type UnderlyingType; \ + return static_cast(static_cast(a) | static_cast(b)); \ +} \ + +#define __CV_ENUM_FLAGS_BITWISE_AND(EnumType, Arg1Type, Arg2Type) \ +static inline EnumType operator&(const Arg1Type& a, const Arg2Type& b) \ +{ \ + typedef std::underlying_type::type UnderlyingType; \ + return static_cast(static_cast(a) & static_cast(b)); \ +} \ + +#define __CV_ENUM_FLAGS_BITWISE_XOR(EnumType, Arg1Type, Arg2Type) \ +static inline EnumType operator^(const Arg1Type& a, const Arg2Type& b) \ +{ \ + typedef std::underlying_type::type UnderlyingType; \ + return static_cast(static_cast(a) ^ static_cast(b)); \ +} \ + +#define __CV_ENUM_FLAGS_BITWISE_OR_EQ(EnumType, Arg1Type) \ +static inline EnumType& operator|=(EnumType& _this, const Arg1Type& val) \ +{ \ + _this = static_cast(static_cast(_this) | static_cast(val)); \ + return _this; \ +} \ + +#define __CV_ENUM_FLAGS_BITWISE_AND_EQ(EnumType, Arg1Type) \ +static inline EnumType& operator&=(EnumType& _this, const Arg1Type& val) \ +{ \ + _this = static_cast(static_cast(_this) & static_cast(val)); \ + return _this; \ +} \ + +#define __CV_ENUM_FLAGS_BITWISE_XOR_EQ(EnumType, Arg1Type) \ +static inline EnumType& operator^=(EnumType& _this, const Arg1Type& val) \ +{ \ + _this = static_cast(static_cast(_this) ^ static_cast(val)); \ + return _this; \ +} \ + +#define CV_ENUM_CLASS_EXPOSE(EnumType, ...) \ +__CV_EXPAND(__CV_CAT(__CV_ENUM_CLASS_EXPOSE_, __CV_VA_NUM_ARGS(__VA_ARGS__))(EnumType, __VA_ARGS__)); \ + +#define CV_ENUM_FLAGS(EnumType) \ +__CV_ENUM_FLAGS_LOGICAL_NOT (EnumType) \ +__CV_ENUM_FLAGS_LOGICAL_EQ (EnumType, int) \ +__CV_ENUM_FLAGS_LOGICAL_NOT_EQ (EnumType, int) \ + \ +__CV_ENUM_FLAGS_BITWISE_NOT (EnumType) \ +__CV_ENUM_FLAGS_BITWISE_OR (EnumType, EnumType, EnumType) \ +__CV_ENUM_FLAGS_BITWISE_AND (EnumType, EnumType, EnumType) \ +__CV_ENUM_FLAGS_BITWISE_XOR (EnumType, EnumType, EnumType) \ + \ +__CV_ENUM_FLAGS_BITWISE_OR_EQ (EnumType, EnumType) \ +__CV_ENUM_FLAGS_BITWISE_AND_EQ (EnumType, EnumType) \ +__CV_ENUM_FLAGS_BITWISE_XOR_EQ (EnumType, EnumType) \ + +/****************************************************************************************\ +* static analysys * +\****************************************************************************************/ + +// In practice, some macro are not processed correctly (noreturn is not detected). +// We need to use simplified definition for them. +#ifndef CV_STATIC_ANALYSIS +# if defined(__KLOCWORK__) || defined(__clang_analyzer__) || defined(__COVERITY__) +# define CV_STATIC_ANALYSIS 1 +# endif +#else +# if defined(CV_STATIC_ANALYSIS) && !(__CV_CAT(1, CV_STATIC_ANALYSIS) == 1) // defined and not empty +# if 0 == CV_STATIC_ANALYSIS +# undef CV_STATIC_ANALYSIS +# endif +# endif +#endif + +/****************************************************************************************\ +* Thread sanitizer * +\****************************************************************************************/ +#ifndef CV_THREAD_SANITIZER +# if defined(__has_feature) +# if __has_feature(thread_sanitizer) +# define CV_THREAD_SANITIZER +# endif +# endif +#endif + /****************************************************************************************\ * exchange-add operation for atomic operations on reference counters * \****************************************************************************************/ -#if defined __INTEL_COMPILER && !(defined WIN32 || defined _WIN32) - // atomic increment on the linux version of the Intel(tm) compiler -# define CV_XADD(addr, delta) (int)_InterlockedExchangeAdd(const_cast(reinterpret_cast(addr)), delta) -#elif defined __GNUC__ -# if defined __clang__ && __clang_major__ >= 3 && !defined __ANDROID__ && !defined __EMSCRIPTEN__ && !defined(__CUDACC__) +#ifdef CV_XADD + // allow to use user-defined macro +#elif defined __GNUC__ || defined __clang__ +# if defined __clang__ && __clang_major__ >= 3 && !defined __ANDROID__ && !defined __EMSCRIPTEN__ && !defined(__CUDACC__) && !defined __INTEL_COMPILER # ifdef __ATOMIC_ACQ_REL # define CV_XADD(addr, delta) __c11_atomic_fetch_add((_Atomic(int)*)(addr), delta, __ATOMIC_ACQ_REL) # else @@ -473,7 +659,11 @@ Cv64suf; # include # define CV_XADD(addr, delta) (int)_InterlockedExchangeAdd((long volatile*)addr, delta) #else - CV_INLINE CV_XADD(int* addr, int delta) { int tmp = *addr; *addr += delta; return tmp; } + #ifdef OPENCV_FORCE_UNSAFE_XADD + CV_INLINE CV_XADD(int* addr, int delta) { int tmp = *addr; *addr += delta; return tmp; } + #else + #error "OpenCV: can't define safe CV_XADD macro for current platform (unsupported). Define CV_XADD macro through custom port header (see OPENCV_INCLUDE_PORT_FILE)" + #endif #endif @@ -493,23 +683,226 @@ Cv64suf; /****************************************************************************************\ -* C++ Move semantics * +* CV_NODISCARD attribute * +* encourages the compiler to issue a warning if the return value is discarded (C++17) * \****************************************************************************************/ - -#ifndef CV_CXX_MOVE_SEMANTICS -# if __cplusplus >= 201103L || defined(__GXX_EXPERIMENTAL_CXX0X__) || defined(_MSC_VER) && _MSC_VER >= 1600 -# define CV_CXX_MOVE_SEMANTICS 1 -# elif defined(__clang) -# if __has_feature(cxx_rvalue_references) -# define CV_CXX_MOVE_SEMANTICS 1 +#ifndef CV_NODISCARD +# if defined(__GNUC__) +# define CV_NODISCARD __attribute__((__warn_unused_result__)) // at least available with GCC 3.4 +# elif defined(__clang__) && defined(__has_attribute) +# if __has_attribute(__warn_unused_result__) +# define CV_NODISCARD __attribute__((__warn_unused_result__)) # endif # endif +#endif +#ifndef CV_NODISCARD +# define CV_NODISCARD /* nothing by default */ +#endif + + +/****************************************************************************************\ +* C++ 11 * +\****************************************************************************************/ +#ifndef CV_CXX11 +# if __cplusplus >= 201103L || (defined(_MSC_VER) && _MSC_VER >= 1800) +# define CV_CXX11 1 +# endif #else -# if CV_CXX_MOVE_SEMANTICS == 0 -# undef CV_CXX_MOVE_SEMANTICS +# if CV_CXX11 == 0 +# undef CV_CXX11 # endif #endif +#ifndef CV_CXX11 +# error "OpenCV 4.x+ requires enabled C++11 support" +#endif + +#define CV_CXX_MOVE_SEMANTICS 1 +#define CV_CXX_MOVE(x) std::move(x) +#define CV_CXX_STD_ARRAY 1 +#include +#ifndef CV_OVERRIDE +# define CV_OVERRIDE override +#endif +#ifndef CV_FINAL +# define CV_FINAL final +#endif + +#ifndef CV_NOEXCEPT +# if __cplusplus >= 201103L || (defined(_MSC_VER) && _MSC_VER >= 1900/*MSVS 2015*/) +# define CV_NOEXCEPT noexcept +# endif +#endif +#ifndef CV_NOEXCEPT +# define CV_NOEXCEPT +#endif + +#ifndef CV_CONSTEXPR +# if __cplusplus >= 201103L || (defined(_MSC_VER) && _MSC_VER >= 1900/*MSVS 2015*/) +# define CV_CONSTEXPR constexpr +# endif +#endif +#ifndef CV_CONSTEXPR +# define CV_CONSTEXPR +#endif + +// Integer types portatibility +#ifdef OPENCV_STDINT_HEADER +#include OPENCV_STDINT_HEADER +#elif defined(__cplusplus) +#if defined(_MSC_VER) && _MSC_VER < 1600 /* MSVS 2010 */ +namespace cv { +typedef signed char int8_t; +typedef unsigned char uint8_t; +typedef signed short int16_t; +typedef unsigned short uint16_t; +typedef signed int int32_t; +typedef unsigned int uint32_t; +typedef signed __int64 int64_t; +typedef unsigned __int64 uint64_t; +} +#elif defined(_MSC_VER) || __cplusplus >= 201103L +#include +namespace cv { +using std::int8_t; +using std::uint8_t; +using std::int16_t; +using std::uint16_t; +using std::int32_t; +using std::uint32_t; +using std::int64_t; +using std::uint64_t; +} +#else +#include +namespace cv { +typedef ::int8_t int8_t; +typedef ::uint8_t uint8_t; +typedef ::int16_t int16_t; +typedef ::uint16_t uint16_t; +typedef ::int32_t int32_t; +typedef ::uint32_t uint32_t; +typedef ::int64_t int64_t; +typedef ::uint64_t uint64_t; +} +#endif +#else // pure C +#include +#endif + +#ifdef __cplusplus +namespace cv +{ + +class float16_t +{ +public: +#if CV_FP16_TYPE + + float16_t() : h(0) {} + explicit float16_t(float x) { h = (__fp16)x; } + operator float() const { return (float)h; } + static float16_t fromBits(ushort w) + { + Cv16suf u; + u.u = w; + float16_t result; + result.h = u.h; + return result; + } + static float16_t zero() + { + float16_t result; + result.h = (__fp16)0; + return result; + } + ushort bits() const + { + Cv16suf u; + u.h = h; + return u.u; + } +protected: + __fp16 h; + +#else + float16_t() : w(0) {} + explicit float16_t(float x) + { + #if CV_AVX2 + __m128 v = _mm_load_ss(&x); + w = (ushort)_mm_cvtsi128_si32(_mm_cvtps_ph(v, 0)); + #else + Cv32suf in; + in.f = x; + unsigned sign = in.u & 0x80000000; + in.u ^= sign; + + if( in.u >= 0x47800000 ) + w = (ushort)(in.u > 0x7f800000 ? 0x7e00 : 0x7c00); + else + { + if (in.u < 0x38800000) + { + in.f += 0.5f; + w = (ushort)(in.u - 0x3f000000); + } + else + { + unsigned t = in.u + 0xc8000fff; + w = (ushort)((t + ((in.u >> 13) & 1)) >> 13); + } + } + + w = (ushort)(w | (sign >> 16)); + #endif + } + + operator float() const + { + #if CV_AVX2 + float f; + _mm_store_ss(&f, _mm_cvtph_ps(_mm_cvtsi32_si128(w))); + return f; + #else + Cv32suf out; + + unsigned t = ((w & 0x7fff) << 13) + 0x38000000; + unsigned sign = (w & 0x8000) << 16; + unsigned e = w & 0x7c00; + + out.u = t + (1 << 23); + out.u = (e >= 0x7c00 ? t + 0x38000000 : + e == 0 ? (static_cast(out.f -= 6.103515625e-05f), out.u) : t) | sign; + return out.f; + #endif + } + + static float16_t fromBits(ushort b) + { + float16_t result; + result.w = b; + return result; + } + static float16_t zero() + { + float16_t result; + result.w = (ushort)0; + return result; + } + ushort bits() const { return w; } +protected: + ushort w; + +#endif +}; + +} +#endif //! @} -#endif // __OPENCV_CORE_CVDEF_H__ +#ifndef __cplusplus +#include "opencv2/core/fast_math.hpp" // define cvRound(double) +#endif + +#endif // OPENCV_CORE_CVDEF_H diff --git a/IPL/include/opencv/opencv2/core/cvstd.hpp b/IPL/include/opencv/opencv2/core/cvstd.hpp index edae954..6ce9e4b 100644 --- a/IPL/include/opencv/opencv2/core/cvstd.hpp +++ b/IPL/include/opencv/opencv2/core/cvstd.hpp @@ -41,25 +41,21 @@ // //M*/ -#ifndef __OPENCV_CORE_CVSTD_HPP__ -#define __OPENCV_CORE_CVSTD_HPP__ +#ifndef OPENCV_CORE_CVSTD_HPP +#define OPENCV_CORE_CVSTD_HPP #ifndef __cplusplus # error cvstd.hpp header must be compiled as C++ #endif #include "opencv2/core/cvdef.h" - #include #include #include -#ifndef OPENCV_NOSTL -# include -#endif +#include // import useful primitives from stl -#ifndef OPENCV_NOSTL_TRANSITIONAL # include # include # include //for abs(int) @@ -67,6 +63,11 @@ namespace cv { + static inline uchar abs(uchar a) { return a; } + static inline ushort abs(ushort a) { return a; } + static inline unsigned abs(unsigned a) { return a; } + static inline uint64 abs(uint64 a) { return a; } + using std::min; using std::max; using std::abs; @@ -77,28 +78,7 @@ namespace cv using std::log; } -namespace std -{ - static inline uchar abs(uchar a) { return a; } - static inline ushort abs(ushort a) { return a; } - static inline unsigned abs(unsigned a) { return a; } - static inline uint64 abs(uint64 a) { return a; } -} - -#else -namespace cv -{ - template static inline T min(T a, T b) { return a < b ? a : b; } - template static inline T max(T a, T b) { return a > b ? a : b; } - template static inline T abs(T a) { return a < 0 ? -a : a; } - template static inline void swap(T& a, T& b) { T tmp = a; a = b; b = tmp; } - - template<> inline uchar abs(uchar a) { return a; } - template<> inline ushort abs(ushort a) { return a; } - template<> inline unsigned abs(unsigned a) { return a; } - template<> inline uint64 abs(uint64 a) { return a; } -} -#endif +#include "cvstd_wrapper.hpp" namespace cv { @@ -125,7 +105,7 @@ double memory deallocation. CV_EXPORTS void fastFree(void* ptr); /*! - The STL-compilant memory Allocator based on cv::fastMalloc() and cv::fastFree() + The STL-compliant memory Allocator based on cv::fastMalloc() and cv::fastFree() */ template class Allocator { @@ -160,910 +140,51 @@ template class Allocator //! @} core_utils -//! @cond IGNORED - -namespace detail -{ - -// Metafunction to avoid taking a reference to void. -template -struct RefOrVoid { typedef T& type; }; - -template<> -struct RefOrVoid{ typedef void type; }; - -template<> -struct RefOrVoid{ typedef const void type; }; - -template<> -struct RefOrVoid{ typedef volatile void type; }; - -template<> -struct RefOrVoid{ typedef const volatile void type; }; - -// This class would be private to Ptr, if it didn't have to be a non-template. -struct PtrOwner; - -} - -template -struct DefaultDeleter -{ - void operator () (Y* p) const; -}; - //! @endcond //! @addtogroup core_basic //! @{ -/** @brief Template class for smart pointers with shared ownership - -A Ptr\ pretends to be a pointer to an object of type T. Unlike an ordinary pointer, however, the -object will be automatically cleaned up once all Ptr instances pointing to it are destroyed. - -Ptr is similar to boost::shared_ptr that is part of the Boost library -() and std::shared_ptr from -the [C++11](http://en.wikipedia.org/wiki/C++11) standard. - -This class provides the following advantages: -- Default constructor, copy constructor, and assignment operator for an arbitrary C++ class or C - structure. For some objects, like files, windows, mutexes, sockets, and others, a copy - constructor or an assignment operator are difficult to define. For some other objects, like - complex classifiers in OpenCV, copy constructors are absent and not easy to implement. Finally, - some of complex OpenCV and your own data structures may be written in C. However, copy - constructors and default constructors can simplify programming a lot. Besides, they are often - required (for example, by STL containers). By using a Ptr to such an object instead of the - object itself, you automatically get all of the necessary constructors and the assignment - operator. -- *O(1)* complexity of the above-mentioned operations. While some structures, like std::vector, - provide a copy constructor and an assignment operator, the operations may take a considerable - amount of time if the data structures are large. But if the structures are put into a Ptr, the - overhead is small and independent of the data size. -- Automatic and customizable cleanup, even for C structures. See the example below with FILE\*. -- Heterogeneous collections of objects. The standard STL and most other C++ and OpenCV containers - can store only objects of the same type and the same size. The classical solution to store - objects of different types in the same container is to store pointers to the base class (Base\*) - instead but then you lose the automatic memory management. Again, by using Ptr\ instead - of raw pointers, you can solve the problem. - -A Ptr is said to *own* a pointer - that is, for each Ptr there is a pointer that will be deleted -once all Ptr instances that own it are destroyed. The owned pointer may be null, in which case -nothing is deleted. Each Ptr also *stores* a pointer. The stored pointer is the pointer the Ptr -pretends to be; that is, the one you get when you use Ptr::get or the conversion to T\*. It's -usually the same as the owned pointer, but if you use casts or the general shared-ownership -constructor, the two may diverge: the Ptr will still own the original pointer, but will itself point -to something else. - -The owned pointer is treated as a black box. The only thing Ptr needs to know about it is how to -delete it. This knowledge is encapsulated in the *deleter* - an auxiliary object that is associated -with the owned pointer and shared between all Ptr instances that own it. The default deleter is an -instance of DefaultDeleter, which uses the standard C++ delete operator; as such it will work with -any pointer allocated with the standard new operator. - -However, if the pointer must be deleted in a different way, you must specify a custom deleter upon -Ptr construction. A deleter is simply a callable object that accepts the pointer as its sole -argument. For example, if you want to wrap FILE, you may do so as follows: -@code - Ptr f(fopen("myfile.txt", "w"), fclose); - if(!f) throw ...; - fprintf(f, ....); - ... - // the file will be closed automatically by f's destructor. -@endcode -Alternatively, if you want all pointers of a particular type to be deleted the same way, you can -specialize DefaultDeleter::operator() for that type, like this: -@code - namespace cv { - template<> void DefaultDeleter::operator ()(FILE * obj) const - { - fclose(obj); - } - } -@endcode -For convenience, the following types from the OpenCV C API already have such a specialization that -calls the appropriate release function: -- CvCapture -- CvFileStorage -- CvHaarClassifierCascade -- CvMat -- CvMatND -- CvMemStorage -- CvSparseMat -- CvVideoWriter -- IplImage -@note The shared ownership mechanism is implemented with reference counting. As such, cyclic -ownership (e.g. when object a contains a Ptr to object b, which contains a Ptr to object a) will -lead to all involved objects never being cleaned up. Avoid such situations. -@note It is safe to concurrently read (but not write) a Ptr instance from multiple threads and -therefore it is normally safe to use it in multi-threaded applications. The same is true for Mat and -other C++ OpenCV classes that use internal reference counts. -*/ -template -struct Ptr -{ - /** Generic programming support. */ - typedef T element_type; - - /** The default constructor creates a null Ptr - one that owns and stores a null pointer. - */ - Ptr(); - - /** - If p is null, these are equivalent to the default constructor. - Otherwise, these constructors assume ownership of p - that is, the created Ptr owns and stores p - and assumes it is the sole owner of it. Don't use them if p is already owned by another Ptr, or - else p will get deleted twice. - With the first constructor, DefaultDeleter\() becomes the associated deleter (so p will - eventually be deleted with the standard delete operator). Y must be a complete type at the point - of invocation. - With the second constructor, d becomes the associated deleter. - Y\* must be convertible to T\*. - @param p Pointer to own. - @note It is often easier to use makePtr instead. - */ - template -#ifdef DISABLE_OPENCV_24_COMPATIBILITY - explicit -#endif - Ptr(Y* p); - - /** @overload - @param d Deleter to use for the owned pointer. - @param p Pointer to own. - */ - template - Ptr(Y* p, D d); - - /** - These constructors create a Ptr that shares ownership with another Ptr - that is, own the same - pointer as o. - With the first two, the same pointer is stored, as well; for the second, Y\* must be convertible - to T\*. - With the third, p is stored, and Y may be any type. This constructor allows to have completely - unrelated owned and stored pointers, and should be used with care to avoid confusion. A relatively - benign use is to create a non-owning Ptr, like this: - @code - ptr = Ptr(Ptr(), dont_delete_me); // owns nothing; will not delete the pointer. - @endcode - @param o Ptr to share ownership with. - */ - Ptr(const Ptr& o); - - /** @overload - @param o Ptr to share ownership with. - */ - template - Ptr(const Ptr& o); - - /** @overload - @param o Ptr to share ownership with. - @param p Pointer to store. - */ - template - Ptr(const Ptr& o, T* p); - - /** The destructor is equivalent to calling Ptr::release. */ - ~Ptr(); - - /** - Assignment replaces the current Ptr instance with one that owns and stores same pointers as o and - then destroys the old instance. - @param o Ptr to share ownership with. - */ - Ptr& operator = (const Ptr& o); - - /** @overload */ - template - Ptr& operator = (const Ptr& o); - - /** If no other Ptr instance owns the owned pointer, deletes it with the associated deleter. Then sets - both the owned and the stored pointers to NULL. - */ - void release(); - - /** - `ptr.reset(...)` is equivalent to `ptr = Ptr(...)`. - @param p Pointer to own. - */ - template - void reset(Y* p); - - /** @overload - @param d Deleter to use for the owned pointer. - @param p Pointer to own. - */ - template - void reset(Y* p, D d); - - /** - Swaps the owned and stored pointers (and deleters, if any) of this and o. - @param o Ptr to swap with. - */ - void swap(Ptr& o); - - /** Returns the stored pointer. */ - T* get() const; - - /** Ordinary pointer emulation. */ - typename detail::RefOrVoid::type operator * () const; - - /** Ordinary pointer emulation. */ - T* operator -> () const; - - /** Equivalent to get(). */ - operator T* () const; - - /** ptr.empty() is equivalent to `!ptr.get()`. */ - bool empty() const; - - /** Returns a Ptr that owns the same pointer as this, and stores the same - pointer as this, except converted via static_cast to Y*. - */ - template - Ptr staticCast() const; - - /** Ditto for const_cast. */ - template - Ptr constCast() const; - - /** Ditto for dynamic_cast. */ - template - Ptr dynamicCast() const; - -#ifdef CV_CXX_MOVE_SEMANTICS - Ptr(Ptr&& o); - Ptr& operator = (Ptr&& o); -#endif - -private: - detail::PtrOwner* owner; - T* stored; - - template - friend struct Ptr; // have to do this for the cross-type copy constructor -}; - -/** Equivalent to ptr1.swap(ptr2). Provided to help write generic algorithms. */ -template -void swap(Ptr& ptr1, Ptr& ptr2); - -/** Return whether ptr1.get() and ptr2.get() are equal and not equal, respectively. */ -template -bool operator == (const Ptr& ptr1, const Ptr& ptr2); -template -bool operator != (const Ptr& ptr1, const Ptr& ptr2); - -/** `makePtr(...)` is equivalent to `Ptr(new T(...))`. It is shorter than the latter, and it's -marginally safer than using a constructor or Ptr::reset, since it ensures that the owned pointer -is new and thus not owned by any other Ptr instance. -Unfortunately, perfect forwarding is impossible to implement in C++03, and so makePtr is limited -to constructors of T that have up to 10 arguments, none of which are non-const references. - */ -template -Ptr makePtr(); -/** @overload */ -template -Ptr makePtr(const A1& a1); -/** @overload */ -template -Ptr makePtr(const A1& a1, const A2& a2); -/** @overload */ -template -Ptr makePtr(const A1& a1, const A2& a2, const A3& a3); -/** @overload */ -template -Ptr makePtr(const A1& a1, const A2& a2, const A3& a3, const A4& a4); -/** @overload */ -template -Ptr makePtr(const A1& a1, const A2& a2, const A3& a3, const A4& a4, const A5& a5); -/** @overload */ -template -Ptr makePtr(const A1& a1, const A2& a2, const A3& a3, const A4& a4, const A5& a5, const A6& a6); -/** @overload */ -template -Ptr makePtr(const A1& a1, const A2& a2, const A3& a3, const A4& a4, const A5& a5, const A6& a6, const A7& a7); -/** @overload */ -template -Ptr makePtr(const A1& a1, const A2& a2, const A3& a3, const A4& a4, const A5& a5, const A6& a6, const A7& a7, const A8& a8); -/** @overload */ -template -Ptr makePtr(const A1& a1, const A2& a2, const A3& a3, const A4& a4, const A5& a5, const A6& a6, const A7& a7, const A8& a8, const A9& a9); -/** @overload */ -template -Ptr makePtr(const A1& a1, const A2& a2, const A3& a3, const A4& a4, const A5& a5, const A6& a6, const A7& a7, const A8& a8, const A9& a9, const A10& a10); - //////////////////////////////// string class //////////////////////////////// class CV_EXPORTS FileNode; //for string constructor from FileNode -class CV_EXPORTS String -{ -public: - typedef char value_type; - typedef char& reference; - typedef const char& const_reference; - typedef char* pointer; - typedef const char* const_pointer; - typedef ptrdiff_t difference_type; - typedef size_t size_type; - typedef char* iterator; - typedef const char* const_iterator; - - static const size_t npos = size_t(-1); - - explicit String(); - String(const String& str); - String(const String& str, size_t pos, size_t len = npos); - String(const char* s); - String(const char* s, size_t n); - String(size_t n, char c); - String(const char* first, const char* last); - template String(Iterator first, Iterator last); - explicit String(const FileNode& fn); - ~String(); - - String& operator=(const String& str); - String& operator=(const char* s); - String& operator=(char c); - - String& operator+=(const String& str); - String& operator+=(const char* s); - String& operator+=(char c); - - size_t size() const; - size_t length() const; - - char operator[](size_t idx) const; - char operator[](int idx) const; - - const char* begin() const; - const char* end() const; - - const char* c_str() const; - - bool empty() const; - void clear(); - - int compare(const char* s) const; - int compare(const String& str) const; - - void swap(String& str); - String substr(size_t pos = 0, size_t len = npos) const; - - size_t find(const char* s, size_t pos, size_t n) const; - size_t find(char c, size_t pos = 0) const; - size_t find(const String& str, size_t pos = 0) const; - size_t find(const char* s, size_t pos = 0) const; - - size_t rfind(const char* s, size_t pos, size_t n) const; - size_t rfind(char c, size_t pos = npos) const; - size_t rfind(const String& str, size_t pos = npos) const; - size_t rfind(const char* s, size_t pos = npos) const; - - size_t find_first_of(const char* s, size_t pos, size_t n) const; - size_t find_first_of(char c, size_t pos = 0) const; - size_t find_first_of(const String& str, size_t pos = 0) const; - size_t find_first_of(const char* s, size_t pos = 0) const; - - size_t find_last_of(const char* s, size_t pos, size_t n) const; - size_t find_last_of(char c, size_t pos = npos) const; - size_t find_last_of(const String& str, size_t pos = npos) const; - size_t find_last_of(const char* s, size_t pos = npos) const; - - friend String operator+ (const String& lhs, const String& rhs); - friend String operator+ (const String& lhs, const char* rhs); - friend String operator+ (const char* lhs, const String& rhs); - friend String operator+ (const String& lhs, char rhs); - friend String operator+ (char lhs, const String& rhs); - - String toLowerCase() const; - -#ifndef OPENCV_NOSTL - String(const std::string& str); - String(const std::string& str, size_t pos, size_t len = npos); - String& operator=(const std::string& str); - String& operator+=(const std::string& str); - operator std::string() const; - - friend String operator+ (const String& lhs, const std::string& rhs); - friend String operator+ (const std::string& lhs, const String& rhs); -#endif +typedef std::string String; -private: - char* cstr_; - size_t len_; - - char* allocate(size_t len); // len without trailing 0 - void deallocate(); - - String(int); // disabled and invalid. Catch invalid usages like, commandLineParser.has(0) problem -}; - -//! @} core_basic - -////////////////////////// cv::String implementation ///////////////////////// +#ifndef OPENCV_DISABLE_STRING_LOWER_UPPER_CONVERSIONS //! @cond IGNORED - -inline -String::String() - : cstr_(0), len_(0) -{} - -inline -String::String(const String& str) - : cstr_(str.cstr_), len_(str.len_) -{ - if (cstr_) - CV_XADD(((int*)cstr_)-1, 1); -} - -inline -String::String(const String& str, size_t pos, size_t len) - : cstr_(0), len_(0) -{ - pos = min(pos, str.len_); - len = min(str.len_ - pos, len); - if (!len) return; - if (len == str.len_) - { - CV_XADD(((int*)str.cstr_)-1, 1); - cstr_ = str.cstr_; - len_ = str.len_; - return; - } - memcpy(allocate(len), str.cstr_ + pos, len); -} - -inline -String::String(const char* s) - : cstr_(0), len_(0) -{ - if (!s) return; - size_t len = strlen(s); - memcpy(allocate(len), s, len); -} - -inline -String::String(const char* s, size_t n) - : cstr_(0), len_(0) -{ - if (!n) return; - memcpy(allocate(n), s, n); -} - -inline -String::String(size_t n, char c) - : cstr_(0), len_(0) -{ - memset(allocate(n), c, n); -} - -inline -String::String(const char* first, const char* last) - : cstr_(0), len_(0) -{ - size_t len = (size_t)(last - first); - memcpy(allocate(len), first, len); -} - -template inline -String::String(Iterator first, Iterator last) - : cstr_(0), len_(0) -{ - size_t len = (size_t)(last - first); - char* str = allocate(len); - while (first != last) - { - *str++ = *first; - ++first; - } -} - -inline -String::~String() +namespace details { +// std::tolower is int->int +static inline char char_tolower(char ch) { - deallocate(); + return (char)std::tolower((int)ch); } - -inline -String& String::operator=(const String& str) +// std::toupper is int->int +static inline char char_toupper(char ch) { - if (&str == this) return *this; - - deallocate(); - if (str.cstr_) CV_XADD(((int*)str.cstr_)-1, 1); - cstr_ = str.cstr_; - len_ = str.len_; - return *this; + return (char)std::toupper((int)ch); } - -inline -String& String::operator=(const char* s) -{ - deallocate(); - if (!s) return *this; - size_t len = strlen(s); - memcpy(allocate(len), s, len); - return *this; -} - -inline -String& String::operator=(char c) -{ - deallocate(); - allocate(1)[0] = c; - return *this; -} - -inline -String& String::operator+=(const String& str) -{ - *this = *this + str; - return *this; -} - -inline -String& String::operator+=(const char* s) -{ - *this = *this + s; - return *this; -} - -inline -String& String::operator+=(char c) -{ - *this = *this + c; - return *this; -} - -inline -size_t String::size() const -{ - return len_; -} - -inline -size_t String::length() const -{ - return len_; -} - -inline -char String::operator[](size_t idx) const -{ - return cstr_[idx]; -} - -inline -char String::operator[](int idx) const -{ - return cstr_[idx]; -} - -inline -const char* String::begin() const -{ - return cstr_; -} - -inline -const char* String::end() const -{ - return len_ ? cstr_ + 1 : 0; -} - -inline -bool String::empty() const -{ - return len_ == 0; -} - -inline -const char* String::c_str() const -{ - return cstr_ ? cstr_ : ""; -} - -inline -void String::swap(String& str) -{ - cv::swap(cstr_, str.cstr_); - cv::swap(len_, str.len_); -} - -inline -void String::clear() -{ - deallocate(); -} - -inline -int String::compare(const char* s) const -{ - if (cstr_ == s) return 0; - return strcmp(c_str(), s); -} - -inline -int String::compare(const String& str) const -{ - if (cstr_ == str.cstr_) return 0; - return strcmp(c_str(), str.c_str()); -} - -inline -String String::substr(size_t pos, size_t len) const -{ - return String(*this, pos, len); -} - -inline -size_t String::find(const char* s, size_t pos, size_t n) const -{ - if (n == 0 || pos + n > len_) return npos; - const char* lmax = cstr_ + len_ - n; - for (const char* i = cstr_ + pos; i <= lmax; ++i) - { - size_t j = 0; - while (j < n && s[j] == i[j]) ++j; - if (j == n) return (size_t)(i - cstr_); - } - return npos; -} - -inline -size_t String::find(char c, size_t pos) const -{ - return find(&c, pos, 1); -} - -inline -size_t String::find(const String& str, size_t pos) const -{ - return find(str.c_str(), pos, str.len_); -} - -inline -size_t String::find(const char* s, size_t pos) const -{ - if (pos >= len_ || !s[0]) return npos; - const char* lmax = cstr_ + len_; - for (const char* i = cstr_ + pos; i < lmax; ++i) - { - size_t j = 0; - while (s[j] && s[j] == i[j]) - { if(i + j >= lmax) return npos; - ++j; - } - if (!s[j]) return (size_t)(i - cstr_); - } - return npos; -} - -inline -size_t String::rfind(const char* s, size_t pos, size_t n) const -{ - if (n > len_) return npos; - if (pos > len_ - n) pos = len_ - n; - for (const char* i = cstr_ + pos; i >= cstr_; --i) - { - size_t j = 0; - while (j < n && s[j] == i[j]) ++j; - if (j == n) return (size_t)(i - cstr_); - } - return npos; -} - -inline -size_t String::rfind(char c, size_t pos) const -{ - return rfind(&c, pos, 1); -} - -inline -size_t String::rfind(const String& str, size_t pos) const -{ - return rfind(str.c_str(), pos, str.len_); -} - -inline -size_t String::rfind(const char* s, size_t pos) const -{ - return rfind(s, pos, strlen(s)); -} - -inline -size_t String::find_first_of(const char* s, size_t pos, size_t n) const -{ - if (n == 0 || pos + n > len_) return npos; - const char* lmax = cstr_ + len_; - for (const char* i = cstr_ + pos; i < lmax; ++i) - { - for (size_t j = 0; j < n; ++j) - if (s[j] == *i) - return (size_t)(i - cstr_); - } - return npos; -} - -inline -size_t String::find_first_of(char c, size_t pos) const -{ - return find_first_of(&c, pos, 1); -} - -inline -size_t String::find_first_of(const String& str, size_t pos) const -{ - return find_first_of(str.c_str(), pos, str.len_); -} - -inline -size_t String::find_first_of(const char* s, size_t pos) const -{ - if (len_ == 0) return npos; - if (pos >= len_ || !s[0]) return npos; - const char* lmax = cstr_ + len_; - for (const char* i = cstr_ + pos; i < lmax; ++i) - { - for (size_t j = 0; s[j]; ++j) - if (s[j] == *i) - return (size_t)(i - cstr_); - } - return npos; -} - -inline -size_t String::find_last_of(const char* s, size_t pos, size_t n) const -{ - if (len_ == 0) return npos; - if (pos >= len_) pos = len_ - 1; - for (const char* i = cstr_ + pos; i >= cstr_; --i) - { - for (size_t j = 0; j < n; ++j) - if (s[j] == *i) - return (size_t)(i - cstr_); - } - return npos; -} - -inline -size_t String::find_last_of(char c, size_t pos) const -{ - return find_last_of(&c, pos, 1); -} - -inline -size_t String::find_last_of(const String& str, size_t pos) const -{ - return find_last_of(str.c_str(), pos, str.len_); -} - -inline -size_t String::find_last_of(const char* s, size_t pos) const -{ - if (len_ == 0) return npos; - if (pos >= len_) pos = len_ - 1; - for (const char* i = cstr_ + pos; i >= cstr_; --i) - { - for (size_t j = 0; s[j]; ++j) - if (s[j] == *i) - return (size_t)(i - cstr_); - } - return npos; -} - -inline -String String::toLowerCase() const -{ - String res(cstr_, len_); - - for (size_t i = 0; i < len_; ++i) - res.cstr_[i] = (char) ::tolower(cstr_[i]); - - return res; -} - +} // namespace details //! @endcond -// ************************* cv::String non-member functions ************************* - -//! @relates cv::String -//! @{ - -inline -String operator + (const String& lhs, const String& rhs) -{ - String s; - s.allocate(lhs.len_ + rhs.len_); - memcpy(s.cstr_, lhs.cstr_, lhs.len_); - memcpy(s.cstr_ + lhs.len_, rhs.cstr_, rhs.len_); - return s; -} - -inline -String operator + (const String& lhs, const char* rhs) +static inline std::string toLowerCase(const std::string& str) { - String s; - size_t rhslen = strlen(rhs); - s.allocate(lhs.len_ + rhslen); - memcpy(s.cstr_, lhs.cstr_, lhs.len_); - memcpy(s.cstr_ + lhs.len_, rhs, rhslen); - return s; + std::string result(str); + std::transform(result.begin(), result.end(), result.begin(), details::char_tolower); + return result; } -inline -String operator + (const char* lhs, const String& rhs) +static inline std::string toUpperCase(const std::string& str) { - String s; - size_t lhslen = strlen(lhs); - s.allocate(lhslen + rhs.len_); - memcpy(s.cstr_, lhs, lhslen); - memcpy(s.cstr_ + lhslen, rhs.cstr_, rhs.len_); - return s; + std::string result(str); + std::transform(result.begin(), result.end(), result.begin(), details::char_toupper); + return result; } -inline -String operator + (const String& lhs, char rhs) -{ - String s; - s.allocate(lhs.len_ + 1); - memcpy(s.cstr_, lhs.cstr_, lhs.len_); - s.cstr_[lhs.len_] = rhs; - return s; -} - -inline -String operator + (char lhs, const String& rhs) -{ - String s; - s.allocate(rhs.len_ + 1); - s.cstr_[0] = lhs; - memcpy(s.cstr_ + 1, rhs.cstr_, rhs.len_); - return s; -} - -static inline bool operator== (const String& lhs, const String& rhs) { return 0 == lhs.compare(rhs); } -static inline bool operator== (const char* lhs, const String& rhs) { return 0 == rhs.compare(lhs); } -static inline bool operator== (const String& lhs, const char* rhs) { return 0 == lhs.compare(rhs); } -static inline bool operator!= (const String& lhs, const String& rhs) { return 0 != lhs.compare(rhs); } -static inline bool operator!= (const char* lhs, const String& rhs) { return 0 != rhs.compare(lhs); } -static inline bool operator!= (const String& lhs, const char* rhs) { return 0 != lhs.compare(rhs); } -static inline bool operator< (const String& lhs, const String& rhs) { return lhs.compare(rhs) < 0; } -static inline bool operator< (const char* lhs, const String& rhs) { return rhs.compare(lhs) > 0; } -static inline bool operator< (const String& lhs, const char* rhs) { return lhs.compare(rhs) < 0; } -static inline bool operator<= (const String& lhs, const String& rhs) { return lhs.compare(rhs) <= 0; } -static inline bool operator<= (const char* lhs, const String& rhs) { return rhs.compare(lhs) >= 0; } -static inline bool operator<= (const String& lhs, const char* rhs) { return lhs.compare(rhs) <= 0; } -static inline bool operator> (const String& lhs, const String& rhs) { return lhs.compare(rhs) > 0; } -static inline bool operator> (const char* lhs, const String& rhs) { return rhs.compare(lhs) < 0; } -static inline bool operator> (const String& lhs, const char* rhs) { return lhs.compare(rhs) > 0; } -static inline bool operator>= (const String& lhs, const String& rhs) { return lhs.compare(rhs) >= 0; } -static inline bool operator>= (const char* lhs, const String& rhs) { return rhs.compare(lhs) <= 0; } -static inline bool operator>= (const String& lhs, const char* rhs) { return lhs.compare(rhs) >= 0; } - -//! @} relates cv::String +#endif // OPENCV_DISABLE_STRING_LOWER_UPPER_CONVERSIONS +//! @} core_basic } // cv -#ifndef OPENCV_NOSTL_TRANSITIONAL -namespace std -{ - static inline void swap(cv::String& a, cv::String& b) { a.swap(b); } -} -#else -namespace cv -{ - template<> inline - void swap(cv::String& a, cv::String& b) - { - a.swap(b); - } -} -#endif - -#include "opencv2/core/ptr.inl.hpp" - -#endif //__OPENCV_CORE_CVSTD_HPP__ +#endif //OPENCV_CORE_CVSTD_HPP diff --git a/IPL/include/opencv/opencv2/core/cvstd.inl.hpp b/IPL/include/opencv/opencv2/core/cvstd.inl.hpp index ad15406..37ad1e6 100644 --- a/IPL/include/opencv/opencv2/core/cvstd.inl.hpp +++ b/IPL/include/opencv/opencv2/core/cvstd.inl.hpp @@ -41,19 +41,22 @@ // //M*/ -#ifndef __OPENCV_CORE_CVSTDINL_HPP__ -#define __OPENCV_CORE_CVSTDINL_HPP__ +#ifndef OPENCV_CORE_CVSTDINL_HPP +#define OPENCV_CORE_CVSTDINL_HPP -#ifndef OPENCV_NOSTL -# include -# include -#endif +#include +#include +#include //! @cond IGNORED +#ifdef _MSC_VER +#pragma warning( push ) +#pragma warning( disable: 4127 ) +#endif + namespace cv { -#ifndef OPENCV_NOSTL template class DataType< std::complex<_Tp> > { @@ -71,103 +74,6 @@ template class DataType< std::complex<_Tp> > typedef Vec vec_type; }; -inline -String::String(const std::string& str) - : cstr_(0), len_(0) -{ - if (!str.empty()) - { - size_t len = str.size(); - memcpy(allocate(len), str.c_str(), len); - } -} - -inline -String::String(const std::string& str, size_t pos, size_t len) - : cstr_(0), len_(0) -{ - size_t strlen = str.size(); - pos = min(pos, strlen); - len = min(strlen - pos, len); - if (!len) return; - memcpy(allocate(len), str.c_str() + pos, len); -} - -inline -String& String::operator = (const std::string& str) -{ - deallocate(); - if (!str.empty()) - { - size_t len = str.size(); - memcpy(allocate(len), str.c_str(), len); - } - return *this; -} - -inline -String& String::operator += (const std::string& str) -{ - *this = *this + str; - return *this; -} - -inline -String::operator std::string() const -{ - return std::string(cstr_, len_); -} - -inline -String operator + (const String& lhs, const std::string& rhs) -{ - String s; - size_t rhslen = rhs.size(); - s.allocate(lhs.len_ + rhslen); - memcpy(s.cstr_, lhs.cstr_, lhs.len_); - memcpy(s.cstr_ + lhs.len_, rhs.c_str(), rhslen); - return s; -} - -inline -String operator + (const std::string& lhs, const String& rhs) -{ - String s; - size_t lhslen = lhs.size(); - s.allocate(lhslen + rhs.len_); - memcpy(s.cstr_, lhs.c_str(), lhslen); - memcpy(s.cstr_ + lhslen, rhs.cstr_, rhs.len_); - return s; -} - -inline -FileNode::operator std::string() const -{ - String value; - read(*this, value, value); - return value; -} - -template<> inline -void operator >> (const FileNode& n, std::string& value) -{ - String val; - read(n, val, val); - value = val; -} - -template<> inline -FileStorage& operator << (FileStorage& fs, const std::string& value) -{ - return fs << cv::String(value); -} - -static inline -std::ostream& operator << (std::ostream& os, const String& str) -{ - return os << str.c_str(); -} - static inline std::ostream& operator << (std::ostream& out, Ptr fmtd) { @@ -183,6 +89,18 @@ std::ostream& operator << (std::ostream& out, const Mat& mtx) return out << Formatter::get()->format(mtx); } +static inline +std::ostream& operator << (std::ostream& out, const UMat& m) +{ + return out << m.getMat(ACCESS_READ); +} + +template static inline +std::ostream& operator << (std::ostream& out, const Complex<_Tp>& c) +{ + return out << "(" << c.re << "," << c.im << ")"; +} + template static inline std::ostream& operator << (std::ostream& out, const std::vector >& vec) { @@ -221,14 +139,7 @@ template static inline std::ostream& operator << (std::ostream& out, const Vec<_Tp, n>& vec) { out << "["; -#ifdef _MSC_VER -#pragma warning( push ) -#pragma warning( disable: 4127 ) -#endif - if(Vec<_Tp, n>::depth < CV_32F) -#ifdef _MSC_VER -#pragma warning( pop ) -#endif + if (cv::traits::Depth<_Tp>::value <= CV_32S) { for (int i = 0; i < n - 1; ++i) { out << (int)vec[i] << ", "; @@ -258,10 +169,29 @@ std::ostream& operator << (std::ostream& out, const Rect_<_Tp>& rect) return out << "[" << rect.width << " x " << rect.height << " from (" << rect.x << ", " << rect.y << ")]"; } +static inline std::ostream& operator << (std::ostream& out, const MatSize& msize) +{ + int i, dims = msize.dims(); + for( i = 0; i < dims; i++ ) + { + out << msize[i]; + if( i < dims-1 ) + out << " x "; + } + return out; +} + +static inline std::ostream &operator<< (std::ostream &s, cv::Range &r) +{ + return s << "[" << r.start << " : " << r.end << ")"; +} -#endif // OPENCV_NOSTL } // cv +#ifdef _MSC_VER +#pragma warning( pop ) +#endif + //! @endcond -#endif // __OPENCV_CORE_CVSTDINL_HPP__ +#endif // OPENCV_CORE_CVSTDINL_HPP diff --git a/IPL/include/opencv/opencv2/core/cvstd_wrapper.hpp b/IPL/include/opencv/opencv2/core/cvstd_wrapper.hpp new file mode 100644 index 0000000..e2c2ea5 --- /dev/null +++ b/IPL/include/opencv/opencv2/core/cvstd_wrapper.hpp @@ -0,0 +1,154 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#ifndef OPENCV_CORE_CVSTD_WRAPPER_HPP +#define OPENCV_CORE_CVSTD_WRAPPER_HPP + +#include "opencv2/core/cvdef.h" + +#include +#include // std::shared_ptr +#include // std::enable_if + +namespace cv { + +using std::nullptr_t; + +//! @addtogroup core_basic +//! @{ + +#ifdef CV_DOXYGEN + +template using Ptr = std::shared_ptr<_Tp>; // In ideal world it should look like this, but we need some compatibility workarounds below + +template static inline +Ptr<_Tp> makePtr(const A1&... a1) { return std::make_shared<_Tp>(a1...); } + +#else // cv::Ptr with compatibility workarounds + +// It should be defined for C-API types only. +// C++ types should use regular "delete" operator. +template struct DefaultDeleter; +#if 0 +{ + void operator()(Y* p) const; +}; +#endif + +namespace sfinae { +template +struct has_parenthesis_operator +{ +private: + template + static CV_CONSTEXPR std::true_type check(typename std::is_same().operator()(std::declval()...))>::type, Ret>::type*); + + template static CV_CONSTEXPR std::false_type check(...); + + typedef decltype(check(0)) type; + +public: +#if __cplusplus >= 201103L || (defined(_MSC_VER) && _MSC_VER >= 1900/*MSVS 2015*/) + static CV_CONSTEXPR bool value = type::value; +#else + // support MSVS 2013 + static const int value = type::value; +#endif +}; +} // namespace sfinae + +template +struct has_custom_delete + : public std::false_type {}; + +// Force has_custom_delete to std::false_type when NVCC is compiling CUDA source files +#ifndef __CUDACC__ +template +struct has_custom_delete, void, T*>::value >::type > + : public std::true_type {}; +#endif + +template +struct Ptr : public std::shared_ptr +{ +#if 0 + using std::shared_ptr::shared_ptr; // GCC 5.x can't handle this +#else + inline Ptr() CV_NOEXCEPT : std::shared_ptr() {} + inline Ptr(nullptr_t) CV_NOEXCEPT : std::shared_ptr(nullptr) {} + template inline Ptr(Y* p, D d) : std::shared_ptr(p, d) {} + template inline Ptr(nullptr_t, D d) : std::shared_ptr(nullptr, d) {} + + template inline Ptr(const Ptr& r, T* ptr) CV_NOEXCEPT : std::shared_ptr(r, ptr) {} + + inline Ptr(const Ptr& o) CV_NOEXCEPT : std::shared_ptr(o) {} + inline Ptr(Ptr&& o) CV_NOEXCEPT : std::shared_ptr(std::move(o)) {} + + template inline Ptr(const Ptr& o) CV_NOEXCEPT : std::shared_ptr(o) {} + template inline Ptr(Ptr&& o) CV_NOEXCEPT : std::shared_ptr(std::move(o)) {} +#endif + inline Ptr(const std::shared_ptr& o) CV_NOEXCEPT : std::shared_ptr(o) {} + inline Ptr(std::shared_ptr&& o) CV_NOEXCEPT : std::shared_ptr(std::move(o)) {} + + // Overload with custom DefaultDeleter: Ptr(...) + template + inline Ptr(const std::true_type&, Y* ptr) : std::shared_ptr(ptr, DefaultDeleter()) {} + + // Overload without custom deleter: Ptr(...); + template + inline Ptr(const std::false_type&, Y* ptr) : std::shared_ptr(ptr) {} + + template + inline Ptr(Y* ptr) : Ptr(has_custom_delete(), ptr) {} + + // Overload with custom DefaultDeleter: Ptr(...) + template + inline void reset(const std::true_type&, Y* ptr) { std::shared_ptr::reset(ptr, DefaultDeleter()); } + + // Overload without custom deleter: Ptr(...); + template + inline void reset(const std::false_type&, Y* ptr) { std::shared_ptr::reset(ptr); } + + template + inline void reset(Y* ptr) { Ptr::reset(has_custom_delete(), ptr); } + + template + void reset(Y* ptr, Deleter d) { std::shared_ptr::reset(ptr, d); } + + void reset() CV_NOEXCEPT { std::shared_ptr::reset(); } + + Ptr& operator=(const Ptr& o) { std::shared_ptr::operator =(o); return *this; } + template inline Ptr& operator=(const Ptr& o) { std::shared_ptr::operator =(o); return *this; } + + T* operator->() const CV_NOEXCEPT { return std::shared_ptr::get();} + typename std::add_lvalue_reference::type operator*() const CV_NOEXCEPT { return *std::shared_ptr::get(); } + + // OpenCV 3.x methods (not a part of standard C++ library) + inline void release() { std::shared_ptr::reset(); } + inline operator T* () const { return std::shared_ptr::get(); } + inline bool empty() const { return std::shared_ptr::get() == nullptr; } + + template inline + Ptr staticCast() const CV_NOEXCEPT { return std::static_pointer_cast(*this); } + + template inline + Ptr constCast() const CV_NOEXCEPT { return std::const_pointer_cast(*this); } + + template inline + Ptr dynamicCast() const CV_NOEXCEPT { return std::dynamic_pointer_cast(*this); } +}; + +template static inline +Ptr<_Tp> makePtr(const A1&... a1) +{ + static_assert( !has_custom_delete<_Tp>::value, "Can't use this makePtr with custom DefaultDeleter"); + return (Ptr<_Tp>)std::make_shared<_Tp>(a1...); +} + +#endif // CV_DOXYGEN + +//! @} core_basic +} // cv + +#endif //OPENCV_CORE_CVSTD_WRAPPER_HPP diff --git a/IPL/include/opencv/opencv2/core/detail/async_promise.hpp b/IPL/include/opencv/opencv2/core/detail/async_promise.hpp new file mode 100644 index 0000000..6eb3fb5 --- /dev/null +++ b/IPL/include/opencv/opencv2/core/detail/async_promise.hpp @@ -0,0 +1,71 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#ifndef OPENCV_CORE_ASYNC_PROMISE_HPP +#define OPENCV_CORE_ASYNC_PROMISE_HPP + +#include "../async.hpp" + +#include "exception_ptr.hpp" + +namespace cv { + +/** @addtogroup core_async +@{ +*/ + + +/** @brief Provides result of asynchronous operations + +*/ +class CV_EXPORTS AsyncPromise +{ +public: + ~AsyncPromise() CV_NOEXCEPT; + AsyncPromise() CV_NOEXCEPT; + explicit AsyncPromise(const AsyncPromise& o) CV_NOEXCEPT; + AsyncPromise& operator=(const AsyncPromise& o) CV_NOEXCEPT; + void release() CV_NOEXCEPT; + + /** Returns associated AsyncArray + @note Can be called once + */ + AsyncArray getArrayResult(); + + /** Stores asynchronous result. + @param[in] value result + */ + void setValue(InputArray value); + + // TODO "move" setters + +#if CV__EXCEPTION_PTR + /** Stores exception. + @param[in] exception exception to be raised in AsyncArray + */ + void setException(std::exception_ptr exception); +#endif + + /** Stores exception. + @param[in] exception exception to be raised in AsyncArray + */ + void setException(const cv::Exception& exception); + +#ifdef CV_CXX11 + explicit AsyncPromise(AsyncPromise&& o) { p = o.p; o.p = NULL; } + AsyncPromise& operator=(AsyncPromise&& o) CV_NOEXCEPT { std::swap(p, o.p); return *this; } +#endif + + + // PImpl + typedef struct AsyncArray::Impl Impl; friend struct AsyncArray::Impl; + inline void* _getImpl() const CV_NOEXCEPT { return p; } +protected: + Impl* p; +}; + + +//! @} +} // namespace +#endif // OPENCV_CORE_ASYNC_PROMISE_HPP diff --git a/IPL/include/opencv/opencv2/core/detail/exception_ptr.hpp b/IPL/include/opencv/opencv2/core/detail/exception_ptr.hpp new file mode 100644 index 0000000..d98ffc4 --- /dev/null +++ b/IPL/include/opencv/opencv2/core/detail/exception_ptr.hpp @@ -0,0 +1,27 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#ifndef OPENCV_CORE_DETAILS_EXCEPTION_PTR_H +#define OPENCV_CORE_DETAILS_EXCEPTION_PTR_H + +#ifndef CV__EXCEPTION_PTR +# if defined(__ANDROID__) && defined(ATOMIC_INT_LOCK_FREE) && ATOMIC_INT_LOCK_FREE < 2 +# define CV__EXCEPTION_PTR 0 // Not supported, details: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58938 +# elif defined(CV_CXX11) +# define CV__EXCEPTION_PTR 1 +# elif defined(_MSC_VER) +# define CV__EXCEPTION_PTR (_MSC_VER >= 1600) +# elif defined(__clang__) +# define CV__EXCEPTION_PTR 0 // C++11 only (see above) +# elif defined(__GNUC__) && defined(__GXX_EXPERIMENTAL_CXX0X__) +# define CV__EXCEPTION_PTR (__GXX_EXPERIMENTAL_CXX0X__ > 0) +# endif +#endif +#ifndef CV__EXCEPTION_PTR +# define CV__EXCEPTION_PTR 0 +#elif CV__EXCEPTION_PTR +# include // std::exception_ptr +#endif + +#endif // OPENCV_CORE_DETAILS_EXCEPTION_PTR_H diff --git a/IPL/include/opencv/opencv2/core/directx.hpp b/IPL/include/opencv/opencv2/core/directx.hpp index 764af74..056a85a 100644 --- a/IPL/include/opencv/opencv2/core/directx.hpp +++ b/IPL/include/opencv/opencv2/core/directx.hpp @@ -39,8 +39,8 @@ // //M*/ -#ifndef __OPENCV_CORE_DIRECTX_HPP__ -#define __OPENCV_CORE_DIRECTX_HPP__ +#ifndef OPENCV_CORE_DIRECTX_HPP +#define OPENCV_CORE_DIRECTX_HPP #include "mat.hpp" #include "ocl.hpp" @@ -181,4 +181,4 @@ CV_EXPORTS int getTypeFromD3DFORMAT(const int iD3DFORMAT); // enum D3DTYPE for D } } // namespace cv::directx -#endif // __OPENCV_CORE_DIRECTX_HPP__ +#endif // OPENCV_CORE_DIRECTX_HPP diff --git a/IPL/include/opencv/opencv2/core/eigen.hpp b/IPL/include/opencv/opencv2/core/eigen.hpp index 44df04c..741648e 100644 --- a/IPL/include/opencv/opencv2/core/eigen.hpp +++ b/IPL/include/opencv/opencv2/core/eigen.hpp @@ -42,8 +42,8 @@ //M*/ -#ifndef __OPENCV_CORE_EIGEN_HPP__ -#define __OPENCV_CORE_EIGEN_HPP__ +#ifndef OPENCV_CORE_EIGEN_HPP +#define OPENCV_CORE_EIGEN_HPP #include "opencv2/core.hpp" @@ -60,18 +60,18 @@ namespace cv //! @{ template static inline -void eigen2cv( const Eigen::Matrix<_Tp, _rows, _cols, _options, _maxRows, _maxCols>& src, Mat& dst ) +void eigen2cv( const Eigen::Matrix<_Tp, _rows, _cols, _options, _maxRows, _maxCols>& src, OutputArray dst ) { if( !(src.Flags & Eigen::RowMajorBit) ) { - Mat _src(src.cols(), src.rows(), DataType<_Tp>::type, - (void*)src.data(), src.stride()*sizeof(_Tp)); + Mat _src(src.cols(), src.rows(), traits::Type<_Tp>::value, + (void*)src.data(), src.outerStride()*sizeof(_Tp)); transpose(_src, dst); } else { - Mat _src(src.rows(), src.cols(), DataType<_Tp>::type, - (void*)src.data(), src.stride()*sizeof(_Tp)); + Mat _src(src.rows(), src.cols(), traits::Type<_Tp>::value, + (void*)src.data(), src.outerStride()*sizeof(_Tp)); _src.copyTo(dst); } } @@ -98,8 +98,8 @@ void cv2eigen( const Mat& src, CV_DbgAssert(src.rows == _rows && src.cols == _cols); if( !(dst.Flags & Eigen::RowMajorBit) ) { - const Mat _dst(src.cols, src.rows, DataType<_Tp>::type, - dst.data(), (size_t)(dst.stride()*sizeof(_Tp))); + const Mat _dst(src.cols, src.rows, traits::Type<_Tp>::value, + dst.data(), (size_t)(dst.outerStride()*sizeof(_Tp))); if( src.type() == _dst.type() ) transpose(src, _dst); else if( src.cols == src.rows ) @@ -112,8 +112,8 @@ void cv2eigen( const Mat& src, } else { - const Mat _dst(src.rows, src.cols, DataType<_Tp>::type, - dst.data(), (size_t)(dst.stride()*sizeof(_Tp))); + const Mat _dst(src.rows, src.cols, traits::Type<_Tp>::value, + dst.data(), (size_t)(dst.outerStride()*sizeof(_Tp))); src.convertTo(_dst, _dst.type()); } } @@ -125,14 +125,14 @@ void cv2eigen( const Matx<_Tp, _rows, _cols>& src, { if( !(dst.Flags & Eigen::RowMajorBit) ) { - const Mat _dst(_cols, _rows, DataType<_Tp>::type, - dst.data(), (size_t)(dst.stride()*sizeof(_Tp))); + const Mat _dst(_cols, _rows, traits::Type<_Tp>::value, + dst.data(), (size_t)(dst.outerStride()*sizeof(_Tp))); transpose(src, _dst); } else { - const Mat _dst(_rows, _cols, DataType<_Tp>::type, - dst.data(), (size_t)(dst.stride()*sizeof(_Tp))); + const Mat _dst(_rows, _cols, traits::Type<_Tp>::value, + dst.data(), (size_t)(dst.outerStride()*sizeof(_Tp))); Mat(src).copyTo(_dst); } } @@ -144,8 +144,8 @@ void cv2eigen( const Mat& src, dst.resize(src.rows, src.cols); if( !(dst.Flags & Eigen::RowMajorBit) ) { - const Mat _dst(src.cols, src.rows, DataType<_Tp>::type, - dst.data(), (size_t)(dst.stride()*sizeof(_Tp))); + const Mat _dst(src.cols, src.rows, traits::Type<_Tp>::value, + dst.data(), (size_t)(dst.outerStride()*sizeof(_Tp))); if( src.type() == _dst.type() ) transpose(src, _dst); else if( src.cols == src.rows ) @@ -158,8 +158,8 @@ void cv2eigen( const Mat& src, } else { - const Mat _dst(src.rows, src.cols, DataType<_Tp>::type, - dst.data(), (size_t)(dst.stride()*sizeof(_Tp))); + const Mat _dst(src.rows, src.cols, traits::Type<_Tp>::value, + dst.data(), (size_t)(dst.outerStride()*sizeof(_Tp))); src.convertTo(_dst, _dst.type()); } } @@ -172,14 +172,14 @@ void cv2eigen( const Matx<_Tp, _rows, _cols>& src, dst.resize(_rows, _cols); if( !(dst.Flags & Eigen::RowMajorBit) ) { - const Mat _dst(_cols, _rows, DataType<_Tp>::type, - dst.data(), (size_t)(dst.stride()*sizeof(_Tp))); + const Mat _dst(_cols, _rows, traits::Type<_Tp>::value, + dst.data(), (size_t)(dst.outerStride()*sizeof(_Tp))); transpose(src, _dst); } else { - const Mat _dst(_rows, _cols, DataType<_Tp>::type, - dst.data(), (size_t)(dst.stride()*sizeof(_Tp))); + const Mat _dst(_rows, _cols, traits::Type<_Tp>::value, + dst.data(), (size_t)(dst.outerStride()*sizeof(_Tp))); Mat(src).copyTo(_dst); } } @@ -193,8 +193,8 @@ void cv2eigen( const Mat& src, if( !(dst.Flags & Eigen::RowMajorBit) ) { - const Mat _dst(src.cols, src.rows, DataType<_Tp>::type, - dst.data(), (size_t)(dst.stride()*sizeof(_Tp))); + const Mat _dst(src.cols, src.rows, traits::Type<_Tp>::value, + dst.data(), (size_t)(dst.outerStride()*sizeof(_Tp))); if( src.type() == _dst.type() ) transpose(src, _dst); else @@ -202,8 +202,8 @@ void cv2eigen( const Mat& src, } else { - const Mat _dst(src.rows, src.cols, DataType<_Tp>::type, - dst.data(), (size_t)(dst.stride()*sizeof(_Tp))); + const Mat _dst(src.rows, src.cols, traits::Type<_Tp>::value, + dst.data(), (size_t)(dst.outerStride()*sizeof(_Tp))); src.convertTo(_dst, _dst.type()); } } @@ -217,14 +217,14 @@ void cv2eigen( const Matx<_Tp, _rows, 1>& src, if( !(dst.Flags & Eigen::RowMajorBit) ) { - const Mat _dst(1, _rows, DataType<_Tp>::type, - dst.data(), (size_t)(dst.stride()*sizeof(_Tp))); + const Mat _dst(1, _rows, traits::Type<_Tp>::value, + dst.data(), (size_t)(dst.outerStride()*sizeof(_Tp))); transpose(src, _dst); } else { - const Mat _dst(_rows, 1, DataType<_Tp>::type, - dst.data(), (size_t)(dst.stride()*sizeof(_Tp))); + const Mat _dst(_rows, 1, traits::Type<_Tp>::value, + dst.data(), (size_t)(dst.outerStride()*sizeof(_Tp))); src.copyTo(_dst); } } @@ -238,8 +238,8 @@ void cv2eigen( const Mat& src, dst.resize(src.cols); if( !(dst.Flags & Eigen::RowMajorBit) ) { - const Mat _dst(src.cols, src.rows, DataType<_Tp>::type, - dst.data(), (size_t)(dst.stride()*sizeof(_Tp))); + const Mat _dst(src.cols, src.rows, traits::Type<_Tp>::value, + dst.data(), (size_t)(dst.outerStride()*sizeof(_Tp))); if( src.type() == _dst.type() ) transpose(src, _dst); else @@ -247,8 +247,8 @@ void cv2eigen( const Mat& src, } else { - const Mat _dst(src.rows, src.cols, DataType<_Tp>::type, - dst.data(), (size_t)(dst.stride()*sizeof(_Tp))); + const Mat _dst(src.rows, src.cols, traits::Type<_Tp>::value, + dst.data(), (size_t)(dst.outerStride()*sizeof(_Tp))); src.convertTo(_dst, _dst.type()); } } @@ -261,14 +261,14 @@ void cv2eigen( const Matx<_Tp, 1, _cols>& src, dst.resize(_cols); if( !(dst.Flags & Eigen::RowMajorBit) ) { - const Mat _dst(_cols, 1, DataType<_Tp>::type, - dst.data(), (size_t)(dst.stride()*sizeof(_Tp))); + const Mat _dst(_cols, 1, traits::Type<_Tp>::value, + dst.data(), (size_t)(dst.outerStride()*sizeof(_Tp))); transpose(src, _dst); } else { - const Mat _dst(1, _cols, DataType<_Tp>::type, - dst.data(), (size_t)(dst.stride()*sizeof(_Tp))); + const Mat _dst(1, _cols, traits::Type<_Tp>::value, + dst.data(), (size_t)(dst.outerStride()*sizeof(_Tp))); Mat(src).copyTo(_dst); } } diff --git a/IPL/include/opencv/opencv2/core/fast_math.hpp b/IPL/include/opencv/opencv2/core/fast_math.hpp index b8b241b..0f53cf5 100644 --- a/IPL/include/opencv/opencv2/core/fast_math.hpp +++ b/IPL/include/opencv/opencv2/core/fast_math.hpp @@ -42,8 +42,8 @@ // //M*/ -#ifndef __OPENCV_CORE_FAST_MATH_HPP__ -#define __OPENCV_CORE_FAST_MATH_HPP__ +#ifndef OPENCV_CORE_FAST_MATH_HPP +#define OPENCV_CORE_FAST_MATH_HPP #include "opencv2/core/cvdef.h" @@ -54,34 +54,139 @@ * fast math * \****************************************************************************************/ -#if defined __BORLANDC__ -# include -#elif defined __cplusplus +#ifdef __cplusplus # include #else -# include -#endif - -#ifdef HAVE_TEGRA_OPTIMIZATION -# include "tegra_round.hpp" +# ifdef __BORLANDC__ +# include +# else +# include +# endif #endif -#if CV_VFP +#if defined(__CUDACC__) + // nothing, intrinsics/asm code is not supported +#else + #if ((defined _MSC_VER && defined _M_X64) \ + || (defined __GNUC__ && defined __x86_64__ && defined __SSE2__)) \ + && !defined(OPENCV_SKIP_INCLUDE_EMMINTRIN_H) + #include + #endif + + #if defined __PPC64__ && defined __GNUC__ && defined _ARCH_PWR8 \ + && !defined(OPENCV_SKIP_INCLUDE_ALTIVEC_H) + #include + #endif + + #if defined(CV_INLINE_ROUND_FLT) + // user-specified version + // CV_INLINE_ROUND_DBL should be defined too + #elif defined __GNUC__ && defined __arm__ && (defined __ARM_PCS_VFP || defined __ARM_VFPV3__ || defined __ARM_NEON__) && !defined __SOFTFP__ // 1. general scheme #define ARM_ROUND(_value, _asm_string) \ int res; \ float temp; \ - asm(_asm_string : [res] "=r" (res), [temp] "=w" (temp) : [value] "w" (_value)); \ + CV_UNUSED(temp); \ + __asm__(_asm_string : [res] "=r" (res), [temp] "=w" (temp) : [value] "w" (_value)); \ return res // 2. version for double #ifdef __clang__ - #define ARM_ROUND_DBL(value) ARM_ROUND(value, "vcvtr.s32.f64 %[temp], %[value] \n vmov %[res], %[temp]") + #define CV_INLINE_ROUND_DBL(value) ARM_ROUND(value, "vcvtr.s32.f64 %[temp], %[value] \n vmov %[res], %[temp]") #else - #define ARM_ROUND_DBL(value) ARM_ROUND(value, "vcvtr.s32.f64 %[temp], %P[value] \n vmov %[res], %[temp]") + #define CV_INLINE_ROUND_DBL(value) ARM_ROUND(value, "vcvtr.s32.f64 %[temp], %P[value] \n vmov %[res], %[temp]") #endif // 3. version for float - #define ARM_ROUND_FLT(value) ARM_ROUND(value, "vcvtr.s32.f32 %[temp], %[value]\n vmov %[res], %[temp]") -#endif // CV_VFP + #define CV_INLINE_ROUND_FLT(value) ARM_ROUND(value, "vcvtr.s32.f32 %[temp], %[value]\n vmov %[res], %[temp]") + #elif defined __PPC64__ && defined __GNUC__ && defined _ARCH_PWR8 + // P8 and newer machines can convert fp32/64 to int quickly. + #define CV_INLINE_ROUND_DBL(value) \ + int out; \ + double temp; \ + __asm__( "fctiw %[temp],%[in]\n\tmfvsrwz %[out],%[temp]\n\t" : [out] "=r" (out), [temp] "=d" (temp) : [in] "d" ((double)(value)) : ); \ + return out; + + // FP32 also works with FP64 routine above + #define CV_INLINE_ROUND_FLT(value) CV_INLINE_ROUND_DBL(value) + #endif + + #ifdef CV_INLINE_ISINF_FLT + // user-specified version + // CV_INLINE_ISINF_DBL should be defined too + #elif defined __PPC64__ && defined _ARCH_PWR9 && defined(scalar_test_data_class) + #define CV_INLINE_ISINF_DBL(value) return scalar_test_data_class(value, 0x30); + #define CV_INLINE_ISINF_FLT(value) CV_INLINE_ISINF_DBL(value) + #endif + + #ifdef CV_INLINE_ISNAN_FLT + // user-specified version + // CV_INLINE_ISNAN_DBL should be defined too + #elif defined __PPC64__ && defined _ARCH_PWR9 && defined(scalar_test_data_class) + #define CV_INLINE_ISNAN_DBL(value) return scalar_test_data_class(value, 0x40); + #define CV_INLINE_ISNAN_FLT(value) CV_INLINE_ISNAN_DBL(value) + #endif + + #if !defined(OPENCV_USE_FASTMATH_BUILTINS) \ + && ( \ + defined(__x86_64__) || defined(__i686__) \ + || defined(__arm__) \ + || defined(__PPC64__) \ + ) + /* Let builtin C math functions when available. Dedicated hardware is available to + round and convert FP values. */ + #define OPENCV_USE_FASTMATH_BUILTINS 1 + #endif + + /* Enable builtin math functions if possible, desired, and available. + Note, not all math functions inline equally. E.g lrint will not inline + without the -fno-math-errno option. */ + #if defined(CV_ICC) + // nothing + #elif defined(OPENCV_USE_FASTMATH_BUILTINS) && OPENCV_USE_FASTMATH_BUILTINS + #if defined(__clang__) + #define CV__FASTMATH_ENABLE_CLANG_MATH_BUILTINS + #if !defined(CV_INLINE_ISNAN_DBL) && __has_builtin(__builtin_isnan) + #define CV_INLINE_ISNAN_DBL(value) return __builtin_isnan(value); + #endif + #if !defined(CV_INLINE_ISNAN_FLT) && __has_builtin(__builtin_isnan) + #define CV_INLINE_ISNAN_FLT(value) return __builtin_isnan(value); + #endif + #if !defined(CV_INLINE_ISINF_DBL) && __has_builtin(__builtin_isinf) + #define CV_INLINE_ISINF_DBL(value) return __builtin_isinf(value); + #endif + #if !defined(CV_INLINE_ISINF_FLT) && __has_builtin(__builtin_isinf) + #define CV_INLINE_ISINF_FLT(value) return __builtin_isinf(value); + #endif + #elif defined(__GNUC__) + #define CV__FASTMATH_ENABLE_GCC_MATH_BUILTINS + #if !defined(CV_INLINE_ISNAN_DBL) + #define CV_INLINE_ISNAN_DBL(value) return __builtin_isnan(value); + #endif + #if !defined(CV_INLINE_ISNAN_FLT) + #define CV_INLINE_ISNAN_FLT(value) return __builtin_isnanf(value); + #endif + #if !defined(CV_INLINE_ISINF_DBL) + #define CV_INLINE_ISINF_DBL(value) return __builtin_isinf(value); + #endif + #if !defined(CV_INLINE_ISINF_FLT) + #define CV_INLINE_ISINF_FLT(value) return __builtin_isinff(value); + #endif + #elif defined(_MSC_VER) + #if !defined(CV_INLINE_ISNAN_DBL) + #define CV_INLINE_ISNAN_DBL(value) return isnan(value); + #endif + #if !defined(CV_INLINE_ISNAN_FLT) + #define CV_INLINE_ISNAN_FLT(value) return isnan(value); + #endif + #if !defined(CV_INLINE_ISINF_DBL) + #define CV_INLINE_ISINF_DBL(value) return isinf(value); + #endif + #if !defined(CV_INLINE_ISINF_FLT) + #define CV_INLINE_ISINF_FLT(value) return isinf(value); + #endif + #endif + #endif + +#endif // defined(__CUDACC__) /** @brief Rounds floating-point number to the nearest integer @@ -91,8 +196,11 @@ CV_INLINE int cvRound( double value ) { -#if ((defined _MSC_VER && defined _M_X64) || (defined __GNUC__ && defined __x86_64__ \ - && defined __SSE2__ && !defined __APPLE__)) && !defined(__CUDACC__) +#if defined CV_INLINE_ROUND_DBL + CV_INLINE_ROUND_DBL(value); +#elif ((defined _MSC_VER && defined _M_X64) || (defined __GNUC__ && defined __x86_64__ \ + && defined __SSE2__ && !defined __APPLE__) || CV_SSE2) \ + && !defined(__CUDACC__) __m128d t = _mm_set_sd( value ); return _mm_cvtsd_si32(t); #elif defined _MSC_VER && defined _M_IX86 @@ -103,15 +211,8 @@ cvRound( double value ) fistp t; } return t; -#elif ((defined _MSC_VER && defined _M_ARM) || defined CV_ICC || \ - defined __GNUC__) && defined HAVE_TEGRA_OPTIMIZATION - TEGRA_ROUND_DBL(value); #elif defined CV_ICC || defined __GNUC__ -# if CV_VFP - ARM_ROUND_DBL(value); -# else - return (int)lrint(value); -# endif + return (int)(lrint(value)); #else /* it's ok if round does not comply with IEEE754 standard; the tests should allow +/-1 difference when the tested functions use round */ @@ -129,17 +230,14 @@ cvRound( double value ) */ CV_INLINE int cvFloor( double value ) { -#if (defined _MSC_VER && defined _M_X64 || (defined __GNUC__ && defined __SSE2__ && !defined __APPLE__)) && !defined(__CUDACC__) - __m128d t = _mm_set_sd( value ); - int i = _mm_cvtsd_si32(t); - return i - _mm_movemask_pd(_mm_cmplt_sd(t, _mm_cvtsi32_sd(t,i))); -#elif defined __GNUC__ +#if (defined CV__FASTMATH_ENABLE_GCC_MATH_BUILTINS || defined CV__FASTMATH_ENABLE_CLANG_MATH_BUILTINS) \ + && ( \ + defined(__PPC64__) \ + ) + return __builtin_floor(value); +#else int i = (int)value; return i - (i > value); -#else - int i = cvRound(value); - float diff = (float)(value - i); - return i - (diff < 0); #endif } @@ -152,17 +250,14 @@ CV_INLINE int cvFloor( double value ) */ CV_INLINE int cvCeil( double value ) { -#if (defined _MSC_VER && defined _M_X64 || (defined __GNUC__ && defined __SSE2__&& !defined __APPLE__)) && !defined(__CUDACC__) - __m128d t = _mm_set_sd( value ); - int i = _mm_cvtsd_si32(t); - return i + _mm_movemask_pd(_mm_cmplt_sd(_mm_cvtsi32_sd(t,i), t)); -#elif defined __GNUC__ +#if (defined CV__FASTMATH_ENABLE_GCC_MATH_BUILTINS || defined CV__FASTMATH_ENABLE_CLANG_MATH_BUILTINS) \ + && ( \ + defined(__PPC64__) \ + ) + return __builtin_ceil(value); +#else int i = (int)value; return i + (i < value); -#else - int i = cvRound(value); - float diff = (float)(i - value); - return i + (diff < 0); #endif } @@ -174,10 +269,14 @@ CV_INLINE int cvCeil( double value ) otherwise. */ CV_INLINE int cvIsNaN( double value ) { +#if defined CV_INLINE_ISNAN_DBL + CV_INLINE_ISNAN_DBL(value); +#else Cv64suf ieee754; ieee754.f = value; return ((unsigned)(ieee754.u >> 32) & 0x7fffffff) + ((unsigned)ieee754.u != 0) > 0x7ff00000; +#endif } /** @brief Determines if the argument is Infinity. @@ -188,10 +287,19 @@ CV_INLINE int cvIsNaN( double value ) and 0 otherwise. */ CV_INLINE int cvIsInf( double value ) { +#if defined CV_INLINE_ISINF_DBL + CV_INLINE_ISINF_DBL(value); +#elif defined(__x86_64__) || defined(_M_X64) || defined(__aarch64__) || defined(_M_ARM64) || defined(__PPC64__) + Cv64suf ieee754; + ieee754.f = value; + return (ieee754.u & 0x7fffffff00000000) == + 0x7ff0000000000000; +#else Cv64suf ieee754; ieee754.f = value; return ((unsigned)(ieee754.u >> 32) & 0x7fffffff) == 0x7ff00000 && (unsigned)ieee754.u == 0; +#endif } #ifdef __cplusplus @@ -199,8 +307,11 @@ CV_INLINE int cvIsInf( double value ) /** @overload */ CV_INLINE int cvRound(float value) { -#if ((defined _MSC_VER && defined _M_X64) || (defined __GNUC__ && defined __x86_64__ && \ - defined __SSE2__ && !defined __APPLE__)) && !defined(__CUDACC__) +#if defined CV_INLINE_ROUND_FLT + CV_INLINE_ROUND_FLT(value); +#elif ((defined _MSC_VER && defined _M_X64) || (defined __GNUC__ && defined __x86_64__ \ + && defined __SSE2__ && !defined __APPLE__) || CV_SSE2) \ + && !defined(__CUDACC__) __m128 t = _mm_set_ss( value ); return _mm_cvtss_si32(t); #elif defined _MSC_VER && defined _M_IX86 @@ -211,15 +322,8 @@ CV_INLINE int cvRound(float value) fistp t; } return t; -#elif ((defined _MSC_VER && defined _M_ARM) || defined CV_ICC || \ - defined __GNUC__) && defined HAVE_TEGRA_OPTIMIZATION - TEGRA_ROUND_FLT(value); #elif defined CV_ICC || defined __GNUC__ -# if CV_VFP - ARM_ROUND_FLT(value); -# else - return (int)lrintf(value); -# endif + return (int)(lrintf(value)); #else /* it's ok if round does not comply with IEEE754 standard; the tests should allow +/-1 difference when the tested functions use round */ @@ -236,17 +340,14 @@ CV_INLINE int cvRound( int value ) /** @overload */ CV_INLINE int cvFloor( float value ) { -#if (defined _MSC_VER && defined _M_X64 || (defined __GNUC__ && defined __SSE2__ && !defined __APPLE__)) && !defined(__CUDACC__) - __m128 t = _mm_set_ss( value ); - int i = _mm_cvtss_si32(t); - return i - _mm_movemask_ps(_mm_cmplt_ss(t, _mm_cvtsi32_ss(t,i))); -#elif defined __GNUC__ +#if (defined CV__FASTMATH_ENABLE_GCC_MATH_BUILTINS || defined CV__FASTMATH_ENABLE_CLANG_MATH_BUILTINS) \ + && ( \ + defined(__PPC64__) \ + ) + return __builtin_floorf(value); +#else int i = (int)value; return i - (i > value); -#else - int i = cvRound(value); - float diff = (float)(value - i); - return i - (diff < 0); #endif } @@ -259,17 +360,14 @@ CV_INLINE int cvFloor( int value ) /** @overload */ CV_INLINE int cvCeil( float value ) { -#if (defined _MSC_VER && defined _M_X64 || (defined __GNUC__ && defined __SSE2__&& !defined __APPLE__)) && !defined(__CUDACC__) - __m128 t = _mm_set_ss( value ); - int i = _mm_cvtss_si32(t); - return i + _mm_movemask_ps(_mm_cmplt_ss(_mm_cvtsi32_ss(t,i), t)); -#elif defined __GNUC__ +#if (defined CV__FASTMATH_ENABLE_GCC_MATH_BUILTINS || defined CV__FASTMATH_ENABLE_CLANG_MATH_BUILTINS) \ + && ( \ + defined(__PPC64__) \ + ) + return __builtin_ceilf(value); +#else int i = (int)value; return i + (i < value); -#else - int i = cvRound(value); - float diff = (float)(i - value); - return i + (diff < 0); #endif } @@ -282,17 +380,25 @@ CV_INLINE int cvCeil( int value ) /** @overload */ CV_INLINE int cvIsNaN( float value ) { +#if defined CV_INLINE_ISNAN_FLT + CV_INLINE_ISNAN_FLT(value); +#else Cv32suf ieee754; ieee754.f = value; return (ieee754.u & 0x7fffffff) > 0x7f800000; +#endif } /** @overload */ CV_INLINE int cvIsInf( float value ) { +#if defined CV_INLINE_ISINF_FLT + CV_INLINE_ISINF_FLT(value); +#else Cv32suf ieee754; ieee754.f = value; return (ieee754.u & 0x7fffffff) == 0x7f800000; +#endif } #endif // __cplusplus diff --git a/IPL/include/opencv/opencv2/core/hal/hal.hpp b/IPL/include/opencv/opencv2/core/hal/hal.hpp index 118913e..0d68078 100644 --- a/IPL/include/opencv/opencv2/core/hal/hal.hpp +++ b/IPL/include/opencv/opencv2/core/hal/hal.hpp @@ -42,23 +42,13 @@ // //M*/ -#ifndef __OPENCV_HAL_HPP__ -#define __OPENCV_HAL_HPP__ +#ifndef OPENCV_HAL_HPP +#define OPENCV_HAL_HPP #include "opencv2/core/cvdef.h" +#include "opencv2/core/cvstd.hpp" #include "opencv2/core/hal/interface.h" -//! @cond IGNORED -#define CALL_HAL(name, fun, ...) \ - int res = fun(__VA_ARGS__); \ - if (res == CV_HAL_ERROR_OK) \ - return; \ - else if (res != CV_HAL_ERROR_NOT_IMPLEMENTED) \ - CV_Error_(cv::Error::StsInternal, \ - ("HAL implementation " CVAUX_STR(name) " ==> " CVAUX_STR(fun) " returned %d (0x%08x)", res, res)); -//! @endcond - - namespace cv { namespace hal { //! @addtogroup core_hal_functions @@ -74,6 +64,23 @@ CV_EXPORTS int LU32f(float* A, size_t astep, int m, float* b, size_t bstep, int CV_EXPORTS int LU64f(double* A, size_t astep, int m, double* b, size_t bstep, int n); CV_EXPORTS bool Cholesky32f(float* A, size_t astep, int m, float* b, size_t bstep, int n); CV_EXPORTS bool Cholesky64f(double* A, size_t astep, int m, double* b, size_t bstep, int n); +CV_EXPORTS void SVD32f(float* At, size_t astep, float* W, float* U, size_t ustep, float* Vt, size_t vstep, int m, int n, int flags); +CV_EXPORTS void SVD64f(double* At, size_t astep, double* W, double* U, size_t ustep, double* Vt, size_t vstep, int m, int n, int flags); +CV_EXPORTS int QR32f(float* A, size_t astep, int m, int n, int k, float* b, size_t bstep, float* hFactors); +CV_EXPORTS int QR64f(double* A, size_t astep, int m, int n, int k, double* b, size_t bstep, double* hFactors); + +CV_EXPORTS void gemm32f(const float* src1, size_t src1_step, const float* src2, size_t src2_step, + float alpha, const float* src3, size_t src3_step, float beta, float* dst, size_t dst_step, + int m_a, int n_a, int n_d, int flags); +CV_EXPORTS void gemm64f(const double* src1, size_t src1_step, const double* src2, size_t src2_step, + double alpha, const double* src3, size_t src3_step, double beta, double* dst, size_t dst_step, + int m_a, int n_a, int n_d, int flags); +CV_EXPORTS void gemm32fc(const float* src1, size_t src1_step, const float* src2, size_t src2_step, + float alpha, const float* src3, size_t src3_step, float beta, float* dst, size_t dst_step, + int m_a, int n_a, int n_d, int flags); +CV_EXPORTS void gemm64fc(const double* src1, size_t src1_step, const double* src2, size_t src2_step, + double alpha, const double* src3, size_t src3_step, double beta, double* dst, size_t dst_step, + int m_a, int n_a, int n_d, int flags); CV_EXPORTS int normL1_(const uchar* a, const uchar* b, int n); CV_EXPORTS float normL1_(const float* a, const float* b, int n); @@ -84,7 +91,8 @@ CV_EXPORTS void exp64f(const double* src, double* dst, int n); CV_EXPORTS void log32f(const float* src, float* dst, int n); CV_EXPORTS void log64f(const double* src, double* dst, int n); -CV_EXPORTS void fastAtan2(const float* y, const float* x, float* dst, int n, bool angleInDegrees); +CV_EXPORTS void fastAtan32f(const float* y, const float* x, float* dst, int n, bool angleInDegrees); +CV_EXPORTS void fastAtan64f(const double* y, const double* x, double* dst, int n, bool angleInDegrees); CV_EXPORTS void magnitude32f(const float* x, const float* y, float* dst, int n); CV_EXPORTS void magnitude64f(const double* x, const double* y, double* dst, int n); CV_EXPORTS void sqrt32f(const float* src, float* dst, int len); @@ -171,13 +179,13 @@ CV_EXPORTS void div32s( const int* src1, size_t step1, const int* src2, size_t s CV_EXPORTS void div32f( const float* src1, size_t step1, const float* src2, size_t step2, float* dst, size_t step, int width, int height, void* scale); CV_EXPORTS void div64f( const double* src1, size_t step1, const double* src2, size_t step2, double* dst, size_t step, int width, int height, void* scale); -CV_EXPORTS void recip8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* scale); -CV_EXPORTS void recip8s( const schar* src1, size_t step1, const schar* src2, size_t step2, schar* dst, size_t step, int width, int height, void* scale); -CV_EXPORTS void recip16u( const ushort* src1, size_t step1, const ushort* src2, size_t step2, ushort* dst, size_t step, int width, int height, void* scale); -CV_EXPORTS void recip16s( const short* src1, size_t step1, const short* src2, size_t step2, short* dst, size_t step, int width, int height, void* scale); -CV_EXPORTS void recip32s( const int* src1, size_t step1, const int* src2, size_t step2, int* dst, size_t step, int width, int height, void* scale); -CV_EXPORTS void recip32f( const float* src1, size_t step1, const float* src2, size_t step2, float* dst, size_t step, int width, int height, void* scale); -CV_EXPORTS void recip64f( const double* src1, size_t step1, const double* src2, size_t step2, double* dst, size_t step, int width, int height, void* scale); +CV_EXPORTS void recip8u( const uchar *, size_t, const uchar * src2, size_t step2, uchar* dst, size_t step, int width, int height, void* scale); +CV_EXPORTS void recip8s( const schar *, size_t, const schar * src2, size_t step2, schar* dst, size_t step, int width, int height, void* scale); +CV_EXPORTS void recip16u( const ushort *, size_t, const ushort * src2, size_t step2, ushort* dst, size_t step, int width, int height, void* scale); +CV_EXPORTS void recip16s( const short *, size_t, const short * src2, size_t step2, short* dst, size_t step, int width, int height, void* scale); +CV_EXPORTS void recip32s( const int *, size_t, const int * src2, size_t step2, int* dst, size_t step, int width, int height, void* scale); +CV_EXPORTS void recip32f( const float *, size_t, const float * src2, size_t step2, float* dst, size_t step, int width, int height, void* scale); +CV_EXPORTS void recip64f( const double *, size_t, const double * src2, size_t step2, double* dst, size_t step, int width, int height, void* scale); CV_EXPORTS void addWeighted8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* _scalars ); CV_EXPORTS void addWeighted8s( const schar* src1, size_t step1, const schar* src2, size_t step2, schar* dst, size_t step, int width, int height, void* scalars ); @@ -187,6 +195,35 @@ CV_EXPORTS void addWeighted32s( const int* src1, size_t step1, const int* src2, CV_EXPORTS void addWeighted32f( const float* src1, size_t step1, const float* src2, size_t step2, float* dst, size_t step, int width, int height, void* scalars ); CV_EXPORTS void addWeighted64f( const double* src1, size_t step1, const double* src2, size_t step2, double* dst, size_t step, int width, int height, void* scalars ); +CV_EXPORTS void cvt16f32f( const float16_t* src, float* dst, int len ); +CV_EXPORTS void cvt32f16f( const float* src, float16_t* dst, int len ); + +CV_EXPORTS void addRNGBias32f( float* arr, const float* scaleBiasPairs, int len ); +CV_EXPORTS void addRNGBias64f( double* arr, const double* scaleBiasPairs, int len ); + +struct CV_EXPORTS DFT1D +{ + static Ptr create(int len, int count, int depth, int flags, bool * useBuffer = 0); + virtual void apply(const uchar *src, uchar *dst) = 0; + virtual ~DFT1D() {} +}; + +struct CV_EXPORTS DFT2D +{ + static Ptr create(int width, int height, int depth, + int src_channels, int dst_channels, + int flags, int nonzero_rows = 0); + virtual void apply(const uchar *src_data, size_t src_step, uchar *dst_data, size_t dst_step) = 0; + virtual ~DFT2D() {} +}; + +struct CV_EXPORTS DCT2D +{ + static Ptr create(int width, int height, int depth, int flags); + virtual void apply(const uchar *src_data, size_t src_step, uchar *dst_data, size_t dst_step) = 0; + virtual ~DCT2D() {} +}; + //! @} core_hal //============================================================================= @@ -204,6 +241,7 @@ CV_EXPORTS void exp(const double* src, double* dst, int n); CV_EXPORTS void log(const float* src, float* dst, int n); CV_EXPORTS void log(const double* src, double* dst, int n); +CV_EXPORTS void fastAtan2(const float* y, const float* x, float* dst, int n, bool angleInDegrees); CV_EXPORTS void magnitude(const float* x, const float* y, float* dst, int n); CV_EXPORTS void magnitude(const double* x, const double* y, double* dst, int n); CV_EXPORTS void sqrt(const float* src, float* dst, int len); @@ -215,4 +253,4 @@ CV_EXPORTS void invSqrt(const double* src, double* dst, int len); }} //cv::hal -#endif //__OPENCV_HAL_HPP__ +#endif //OPENCV_HAL_HPP diff --git a/IPL/include/opencv/opencv2/core/hal/interface.h b/IPL/include/opencv/opencv2/core/hal/interface.h index 51f7606..6f0a83d 100644 --- a/IPL/include/opencv/opencv2/core/hal/interface.h +++ b/IPL/include/opencv/opencv2/core/hal/interface.h @@ -1,42 +1,42 @@ -#ifndef _HAL_INTERFACE_HPP_INCLUDED_ -#define _HAL_INTERFACE_HPP_INCLUDED_ +#ifndef OPENCV_CORE_HAL_INTERFACE_H +#define OPENCV_CORE_HAL_INTERFACE_H //! @addtogroup core_hal_interface //! @{ +//! @name Return codes +//! @{ #define CV_HAL_ERROR_OK 0 #define CV_HAL_ERROR_NOT_IMPLEMENTED 1 #define CV_HAL_ERROR_UNKNOWN -1 - -#define CV_HAL_CMP_EQ 0 -#define CV_HAL_CMP_GT 1 -#define CV_HAL_CMP_GE 2 -#define CV_HAL_CMP_LT 3 -#define CV_HAL_CMP_LE 4 -#define CV_HAL_CMP_NE 5 +//! @} #ifdef __cplusplus #include #else #include +#include #endif -/* primitive types */ -/* - schar - signed 1 byte integer - uchar - unsigned 1 byte integer - short - signed 2 byte integer - ushort - unsigned 2 byte integer - int - signed 4 byte integer - uint - unsigned 4 byte integer - int64 - signed 8 byte integer - uint64 - unsigned 8 byte integer -*/ - +//! @name Data types +//! primitive types +//! - schar - signed 1 byte integer +//! - uchar - unsigned 1 byte integer +//! - short - signed 2 byte integer +//! - ushort - unsigned 2 byte integer +//! - int - signed 4 byte integer +//! - uint - unsigned 4 byte integer +//! - int64 - signed 8 byte integer +//! - uint64 - unsigned 8 byte integer +//! @{ #if !defined _MSC_VER && !defined __BORLANDC__ # if defined __cplusplus && __cplusplus >= 201103L && !defined __APPLE__ # include - typedef std::uint32_t uint; +# ifdef __NEWLIB__ + typedef unsigned int uint; +# else + typedef std::uint32_t uint; +# endif # else # include typedef uint32_t uint; @@ -64,6 +64,127 @@ typedef signed char schar; # define CV_BIG_UINT(n) n##ULL #endif +#define CV_USRTYPE1 (void)"CV_USRTYPE1 support has been dropped in OpenCV 4.0" + +#define CV_CN_MAX 512 +#define CV_CN_SHIFT 3 +#define CV_DEPTH_MAX (1 << CV_CN_SHIFT) + +#define CV_8U 0 +#define CV_8S 1 +#define CV_16U 2 +#define CV_16S 3 +#define CV_32S 4 +#define CV_32F 5 +#define CV_64F 6 +#define CV_16F 7 + +#define CV_MAT_DEPTH_MASK (CV_DEPTH_MAX - 1) +#define CV_MAT_DEPTH(flags) ((flags) & CV_MAT_DEPTH_MASK) + +#define CV_MAKETYPE(depth,cn) (CV_MAT_DEPTH(depth) + (((cn)-1) << CV_CN_SHIFT)) +#define CV_MAKE_TYPE CV_MAKETYPE + +#define CV_8UC1 CV_MAKETYPE(CV_8U,1) +#define CV_8UC2 CV_MAKETYPE(CV_8U,2) +#define CV_8UC3 CV_MAKETYPE(CV_8U,3) +#define CV_8UC4 CV_MAKETYPE(CV_8U,4) +#define CV_8UC(n) CV_MAKETYPE(CV_8U,(n)) + +#define CV_8SC1 CV_MAKETYPE(CV_8S,1) +#define CV_8SC2 CV_MAKETYPE(CV_8S,2) +#define CV_8SC3 CV_MAKETYPE(CV_8S,3) +#define CV_8SC4 CV_MAKETYPE(CV_8S,4) +#define CV_8SC(n) CV_MAKETYPE(CV_8S,(n)) + +#define CV_16UC1 CV_MAKETYPE(CV_16U,1) +#define CV_16UC2 CV_MAKETYPE(CV_16U,2) +#define CV_16UC3 CV_MAKETYPE(CV_16U,3) +#define CV_16UC4 CV_MAKETYPE(CV_16U,4) +#define CV_16UC(n) CV_MAKETYPE(CV_16U,(n)) + +#define CV_16SC1 CV_MAKETYPE(CV_16S,1) +#define CV_16SC2 CV_MAKETYPE(CV_16S,2) +#define CV_16SC3 CV_MAKETYPE(CV_16S,3) +#define CV_16SC4 CV_MAKETYPE(CV_16S,4) +#define CV_16SC(n) CV_MAKETYPE(CV_16S,(n)) + +#define CV_32SC1 CV_MAKETYPE(CV_32S,1) +#define CV_32SC2 CV_MAKETYPE(CV_32S,2) +#define CV_32SC3 CV_MAKETYPE(CV_32S,3) +#define CV_32SC4 CV_MAKETYPE(CV_32S,4) +#define CV_32SC(n) CV_MAKETYPE(CV_32S,(n)) + +#define CV_32FC1 CV_MAKETYPE(CV_32F,1) +#define CV_32FC2 CV_MAKETYPE(CV_32F,2) +#define CV_32FC3 CV_MAKETYPE(CV_32F,3) +#define CV_32FC4 CV_MAKETYPE(CV_32F,4) +#define CV_32FC(n) CV_MAKETYPE(CV_32F,(n)) + +#define CV_64FC1 CV_MAKETYPE(CV_64F,1) +#define CV_64FC2 CV_MAKETYPE(CV_64F,2) +#define CV_64FC3 CV_MAKETYPE(CV_64F,3) +#define CV_64FC4 CV_MAKETYPE(CV_64F,4) +#define CV_64FC(n) CV_MAKETYPE(CV_64F,(n)) + +#define CV_16FC1 CV_MAKETYPE(CV_16F,1) +#define CV_16FC2 CV_MAKETYPE(CV_16F,2) +#define CV_16FC3 CV_MAKETYPE(CV_16F,3) +#define CV_16FC4 CV_MAKETYPE(CV_16F,4) +#define CV_16FC(n) CV_MAKETYPE(CV_16F,(n)) +//! @} + +//! @name Comparison operation +//! @sa cv::CmpTypes +//! @{ +#define CV_HAL_CMP_EQ 0 +#define CV_HAL_CMP_GT 1 +#define CV_HAL_CMP_GE 2 +#define CV_HAL_CMP_LT 3 +#define CV_HAL_CMP_LE 4 +#define CV_HAL_CMP_NE 5 +//! @} + +//! @name Border processing modes +//! @sa cv::BorderTypes +//! @{ +#define CV_HAL_BORDER_CONSTANT 0 +#define CV_HAL_BORDER_REPLICATE 1 +#define CV_HAL_BORDER_REFLECT 2 +#define CV_HAL_BORDER_WRAP 3 +#define CV_HAL_BORDER_REFLECT_101 4 +#define CV_HAL_BORDER_TRANSPARENT 5 +#define CV_HAL_BORDER_ISOLATED 16 +//! @} + +//! @name DFT flags +//! @{ +#define CV_HAL_DFT_INVERSE 1 +#define CV_HAL_DFT_SCALE 2 +#define CV_HAL_DFT_ROWS 4 +#define CV_HAL_DFT_COMPLEX_OUTPUT 16 +#define CV_HAL_DFT_REAL_OUTPUT 32 +#define CV_HAL_DFT_TWO_STAGE 64 +#define CV_HAL_DFT_STAGE_COLS 128 +#define CV_HAL_DFT_IS_CONTINUOUS 512 +#define CV_HAL_DFT_IS_INPLACE 1024 +//! @} + +//! @name SVD flags +//! @{ +#define CV_HAL_SVD_NO_UV 1 +#define CV_HAL_SVD_SHORT_UV 2 +#define CV_HAL_SVD_MODIFY_A 4 +#define CV_HAL_SVD_FULL_UV 8 +//! @} + +//! @name Gemm flags +//! @{ +#define CV_HAL_GEMM_1_T 1 +#define CV_HAL_GEMM_2_T 2 +#define CV_HAL_GEMM_3_T 4 +//! @} + //! @} #endif diff --git a/IPL/include/opencv/opencv2/core/hal/intrin.hpp b/IPL/include/opencv/opencv2/core/hal/intrin.hpp index 33e14b4..52f6b5d 100644 --- a/IPL/include/opencv/opencv2/core/hal/intrin.hpp +++ b/IPL/include/opencv/opencv2/core/hal/intrin.hpp @@ -42,8 +42,8 @@ // //M*/ -#ifndef __OPENCV_HAL_INTRIN_HPP__ -#define __OPENCV_HAL_INTRIN_HPP__ +#ifndef OPENCV_HAL_INTRIN_HPP +#define OPENCV_HAL_INTRIN_HPP #include #include @@ -55,266 +55,466 @@ #define OPENCV_HAL_NOP(a) (a) #define OPENCV_HAL_1ST(a, b) (a) +namespace { +inline unsigned int trailingZeros32(unsigned int value) { +#if defined(_MSC_VER) +#if (_MSC_VER < 1700) || defined(_M_ARM) || defined(_M_ARM64) + unsigned long index = 0; + _BitScanForward(&index, value); + return (unsigned int)index; +#elif defined(__clang__) + // clang-cl doesn't export _tzcnt_u32 for non BMI systems + return value ? __builtin_ctz(value) : 32; +#else + return _tzcnt_u32(value); +#endif +#elif defined(__GNUC__) || defined(__GNUG__) + return __builtin_ctz(value); +#elif defined(__ICC) || defined(__INTEL_COMPILER) + return _bit_scan_forward(value); +#elif defined(__clang__) + return llvm.cttz.i32(value, true); +#else + static const int MultiplyDeBruijnBitPosition[32] = { + 0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8, + 31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9 }; + return MultiplyDeBruijnBitPosition[((uint32_t)((value & -value) * 0x077CB531U)) >> 27]; +#endif +} +} + // unlike HAL API, which is in cv::hal, // we put intrinsics into cv namespace to make its // access from within opencv code more accessible namespace cv { -//! @addtogroup core_hal_intrin -//! @{ +namespace hal { -//! @cond IGNORED -template struct V_TypeTraits +enum StoreMode { - typedef _Tp int_type; - typedef _Tp uint_type; - typedef _Tp abs_type; - typedef _Tp sum_type; + STORE_UNALIGNED = 0, + STORE_ALIGNED = 1, + STORE_ALIGNED_NOCACHE = 2 +}; - enum { delta = 0, shift = 0 }; +} - static int_type reinterpret_int(_Tp x) { return x; } - static uint_type reinterpet_uint(_Tp x) { return x; } - static _Tp reinterpret_from_int(int_type x) { return (_Tp)x; } +// TODO FIXIT: Don't use "God" traits. Split on separate cases. +template struct V_TypeTraits +{ }; -template<> struct V_TypeTraits -{ - typedef uchar value_type; - typedef schar int_type; - typedef uchar uint_type; - typedef uchar abs_type; - typedef int sum_type; +#define CV_INTRIN_DEF_TYPE_TRAITS(type, int_type_, uint_type_, abs_type_, w_type_, q_type_, sum_type_, nlanes128_) \ + template<> struct V_TypeTraits \ + { \ + typedef type value_type; \ + typedef int_type_ int_type; \ + typedef abs_type_ abs_type; \ + typedef uint_type_ uint_type; \ + typedef w_type_ w_type; \ + typedef q_type_ q_type; \ + typedef sum_type_ sum_type; \ + enum { nlanes128 = nlanes128_ }; \ + \ + static inline int_type reinterpret_int(type x) \ + { \ + union { type l; int_type i; } v; \ + v.l = x; \ + return v.i; \ + } \ + \ + static inline type reinterpret_from_int(int_type x) \ + { \ + union { type l; int_type i; } v; \ + v.i = x; \ + return v.l; \ + } \ + } - typedef ushort w_type; - typedef unsigned q_type; +#define CV_INTRIN_DEF_TYPE_TRAITS_NO_Q_TYPE(type, int_type_, uint_type_, abs_type_, w_type_, sum_type_, nlanes128_) \ + template<> struct V_TypeTraits \ + { \ + typedef type value_type; \ + typedef int_type_ int_type; \ + typedef abs_type_ abs_type; \ + typedef uint_type_ uint_type; \ + typedef w_type_ w_type; \ + typedef sum_type_ sum_type; \ + enum { nlanes128 = nlanes128_ }; \ + \ + static inline int_type reinterpret_int(type x) \ + { \ + union { type l; int_type i; } v; \ + v.l = x; \ + return v.i; \ + } \ + \ + static inline type reinterpret_from_int(int_type x) \ + { \ + union { type l; int_type i; } v; \ + v.i = x; \ + return v.l; \ + } \ + } - enum { delta = 128, shift = 8 }; +CV_INTRIN_DEF_TYPE_TRAITS(uchar, schar, uchar, uchar, ushort, unsigned, unsigned, 16); +CV_INTRIN_DEF_TYPE_TRAITS(schar, schar, uchar, uchar, short, int, int, 16); +CV_INTRIN_DEF_TYPE_TRAITS(ushort, short, ushort, ushort, unsigned, uint64, unsigned, 8); +CV_INTRIN_DEF_TYPE_TRAITS(short, short, ushort, ushort, int, int64, int, 8); +CV_INTRIN_DEF_TYPE_TRAITS_NO_Q_TYPE(unsigned, int, unsigned, unsigned, uint64, unsigned, 4); +CV_INTRIN_DEF_TYPE_TRAITS_NO_Q_TYPE(int, int, unsigned, unsigned, int64, int, 4); +CV_INTRIN_DEF_TYPE_TRAITS_NO_Q_TYPE(float, int, unsigned, float, double, float, 4); +CV_INTRIN_DEF_TYPE_TRAITS_NO_Q_TYPE(uint64, int64, uint64, uint64, void, uint64, 2); +CV_INTRIN_DEF_TYPE_TRAITS_NO_Q_TYPE(int64, int64, uint64, uint64, void, int64, 2); +CV_INTRIN_DEF_TYPE_TRAITS_NO_Q_TYPE(double, int64, uint64, double, void, double, 2); + +#ifndef CV_DOXYGEN + +#ifndef CV_CPU_OPTIMIZATION_HAL_NAMESPACE +#ifdef CV_FORCE_SIMD128_CPP + #define CV_CPU_OPTIMIZATION_HAL_NAMESPACE hal_EMULATOR_CPP + #define CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN namespace hal_EMULATOR_CPP { + #define CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END } +#elif defined(CV_CPU_DISPATCH_MODE) + #define CV_CPU_OPTIMIZATION_HAL_NAMESPACE __CV_CAT(hal_, CV_CPU_DISPATCH_MODE) + #define CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN namespace __CV_CAT(hal_, CV_CPU_DISPATCH_MODE) { + #define CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END } +#else + #define CV_CPU_OPTIMIZATION_HAL_NAMESPACE hal_baseline + #define CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN namespace hal_baseline { + #define CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END } +#endif +#endif // CV_CPU_OPTIMIZATION_HAL_NAMESPACE - static int_type reinterpret_int(value_type x) { return (int_type)x; } - static uint_type reinterpret_uint(value_type x) { return (uint_type)x; } - static value_type reinterpret_from_int(int_type x) { return (value_type)x; } -}; +CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN +CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END +using namespace CV_CPU_OPTIMIZATION_HAL_NAMESPACE; +#endif +} -template<> struct V_TypeTraits -{ - typedef schar value_type; - typedef schar int_type; - typedef uchar uint_type; - typedef uchar abs_type; - typedef int sum_type; +#ifdef CV_DOXYGEN +# undef CV_AVX2 +# undef CV_SSE2 +# undef CV_NEON +# undef CV_VSX +# undef CV_FP16 +# undef CV_MSA +#endif - typedef short w_type; - typedef int q_type; +#if (CV_SSE2 || CV_NEON || CV_VSX || CV_MSA || CV_WASM_SIMD) && !defined(CV_FORCE_SIMD128_CPP) +#define CV__SIMD_FORWARD 128 +#include "opencv2/core/hal/intrin_forward.hpp" +#endif - enum { delta = 128, shift = 8 }; +#if CV_SSE2 && !defined(CV_FORCE_SIMD128_CPP) - static int_type reinterpret_int(value_type x) { return (int_type)x; } - static uint_type reinterpret_uint(value_type x) { return (uint_type)x; } - static value_type reinterpret_from_int(int_type x) { return (value_type)x; } -}; +#include "opencv2/core/hal/intrin_sse_em.hpp" +#include "opencv2/core/hal/intrin_sse.hpp" -template<> struct V_TypeTraits -{ - typedef ushort value_type; - typedef short int_type; - typedef ushort uint_type; - typedef ushort abs_type; - typedef int sum_type; +#elif CV_NEON && !defined(CV_FORCE_SIMD128_CPP) - typedef unsigned w_type; - typedef uchar nu_type; +#include "opencv2/core/hal/intrin_neon.hpp" - enum { delta = 32768, shift = 16 }; +#elif CV_VSX && !defined(CV_FORCE_SIMD128_CPP) - static int_type reinterpret_int(value_type x) { return (int_type)x; } - static uint_type reinterpret_uint(value_type x) { return (uint_type)x; } - static value_type reinterpret_from_int(int_type x) { return (value_type)x; } -}; +#include "opencv2/core/hal/intrin_vsx.hpp" -template<> struct V_TypeTraits -{ - typedef short value_type; - typedef short int_type; - typedef ushort uint_type; - typedef ushort abs_type; - typedef int sum_type; +#elif CV_MSA && !defined(CV_FORCE_SIMD128_CPP) - typedef int w_type; - typedef uchar nu_type; - typedef schar n_type; +#include "opencv2/core/hal/intrin_msa.hpp" - enum { delta = 128, shift = 8 }; +#elif CV_WASM_SIMD && !defined(CV_FORCE_SIMD128_CPP) +#include "opencv2/core/hal/intrin_wasm.hpp" - static int_type reinterpret_int(value_type x) { return (int_type)x; } - static uint_type reinterpret_uint(value_type x) { return (uint_type)x; } - static value_type reinterpret_from_int(int_type x) { return (value_type)x; } -}; +#else -template<> struct V_TypeTraits -{ - typedef unsigned value_type; - typedef int int_type; - typedef unsigned uint_type; - typedef unsigned abs_type; - typedef unsigned sum_type; - - typedef uint64 w_type; - typedef ushort nu_type; - - static int_type reinterpret_int(value_type x) { return (int_type)x; } - static uint_type reinterpret_uint(value_type x) { return (uint_type)x; } - static value_type reinterpret_from_int(int_type x) { return (value_type)x; } -}; +#include "opencv2/core/hal/intrin_cpp.hpp" -template<> struct V_TypeTraits -{ - typedef int value_type; - typedef int int_type; - typedef unsigned uint_type; - typedef unsigned abs_type; - typedef int sum_type; - - typedef int64 w_type; - typedef short n_type; - typedef ushort nu_type; - - static int_type reinterpret_int(value_type x) { return (int_type)x; } - static uint_type reinterpret_uint(value_type x) { return (uint_type)x; } - static value_type reinterpret_from_int(int_type x) { return (value_type)x; } -}; +#endif -template<> struct V_TypeTraits -{ - typedef uint64 value_type; - typedef int64 int_type; - typedef uint64 uint_type; - typedef uint64 abs_type; - typedef uint64 sum_type; +// AVX2 can be used together with SSE2, so +// we define those two sets of intrinsics at once. +// Most of the intrinsics do not conflict (the proper overloaded variant is +// resolved by the argument types, e.g. v_float32x4 ~ SSE2, v_float32x8 ~ AVX2), +// but some of AVX2 intrinsics get v256_ prefix instead of v_, e.g. v256_load() vs v_load(). +// Correspondingly, the wide intrinsics (which are mapped to the "widest" +// available instruction set) will get vx_ prefix +// (and will be mapped to v256_ counterparts) (e.g. vx_load() => v256_load()) +#if CV_AVX2 - typedef unsigned nu_type; +#define CV__SIMD_FORWARD 256 +#include "opencv2/core/hal/intrin_forward.hpp" +#include "opencv2/core/hal/intrin_avx.hpp" - static int_type reinterpret_int(value_type x) { return (int_type)x; } - static uint_type reinterpret_uint(value_type x) { return (uint_type)x; } - static value_type reinterpret_from_int(int_type x) { return (value_type)x; } -}; +#endif -template<> struct V_TypeTraits -{ - typedef int64 value_type; - typedef int64 int_type; - typedef uint64 uint_type; - typedef uint64 abs_type; - typedef int64 sum_type; +// AVX512 can be used together with SSE2 and AVX2, so +// we define those sets of intrinsics at once. +// For some of AVX512 intrinsics get v512_ prefix instead of v_, e.g. v512_load() vs v_load(). +// Wide intrinsics will be mapped to v512_ counterparts in this case(e.g. vx_load() => v512_load()) +#if CV_AVX512_SKX - typedef int nu_type; +#define CV__SIMD_FORWARD 512 +#include "opencv2/core/hal/intrin_forward.hpp" +#include "opencv2/core/hal/intrin_avx512.hpp" - static int_type reinterpret_int(value_type x) { return (int_type)x; } - static uint_type reinterpret_uint(value_type x) { return (uint_type)x; } - static value_type reinterpret_from_int(int_type x) { return (value_type)x; } -}; +#endif +//! @cond IGNORED -template<> struct V_TypeTraits -{ - typedef float value_type; - typedef int int_type; - typedef unsigned uint_type; - typedef float abs_type; - typedef float sum_type; - - typedef double w_type; - - static int_type reinterpret_int(value_type x) - { - Cv32suf u; - u.f = x; - return u.i; - } - static uint_type reinterpet_uint(value_type x) - { - Cv32suf u; - u.f = x; - return u.u; - } - static value_type reinterpret_from_int(int_type x) - { - Cv32suf u; - u.i = x; - return u.f; - } -}; +namespace cv { -template<> struct V_TypeTraits -{ - typedef double value_type; - typedef int64 int_type; - typedef uint64 uint_type; - typedef double abs_type; - typedef double sum_type; - static int_type reinterpret_int(value_type x) - { - Cv64suf u; - u.f = x; - return u.i; - } - static uint_type reinterpet_uint(value_type x) - { - Cv64suf u; - u.f = x; - return u.u; - } - static value_type reinterpret_from_int(int_type x) - { - Cv64suf u; - u.i = x; - return u.f; - } -}; +#ifndef CV_DOXYGEN +CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN +#endif -template struct V_SIMD128Traits -{ - enum { nlanes = 16 / sizeof(T) }; -}; +#ifndef CV_SIMD128 +#define CV_SIMD128 0 +#endif -//! @endcond +#ifndef CV_SIMD128_CPP +#define CV_SIMD128_CPP 0 +#endif -//! @} +#ifndef CV_SIMD128_64F +#define CV_SIMD128_64F 0 +#endif -} +#ifndef CV_SIMD256 +#define CV_SIMD256 0 +#endif -#ifdef CV_DOXYGEN -# undef CV_SSE2 -# undef CV_NEON +#ifndef CV_SIMD256_64F +#define CV_SIMD256_64F 0 #endif -#if CV_SSE2 +#ifndef CV_SIMD512 +#define CV_SIMD512 0 +#endif -#include "opencv2/core/hal/intrin_sse.hpp" +#ifndef CV_SIMD512_64F +#define CV_SIMD512_64F 0 +#endif -#elif CV_NEON +#ifndef CV_SIMD128_FP16 +#define CV_SIMD128_FP16 0 +#endif -#include "opencv2/core/hal/intrin_neon.hpp" +#ifndef CV_SIMD256_FP16 +#define CV_SIMD256_FP16 0 +#endif +#ifndef CV_SIMD512_FP16 +#define CV_SIMD512_FP16 0 +#endif + +//================================================================================================== + +#define CV_INTRIN_DEFINE_WIDE_INTRIN(typ, vtyp, short_typ, prefix, loadsfx) \ + inline vtyp vx_setall_##short_typ(typ v) { return prefix##_setall_##short_typ(v); } \ + inline vtyp vx_setzero_##short_typ() { return prefix##_setzero_##short_typ(); } \ + inline vtyp vx_##loadsfx(const typ* ptr) { return prefix##_##loadsfx(ptr); } \ + inline vtyp vx_##loadsfx##_aligned(const typ* ptr) { return prefix##_##loadsfx##_aligned(ptr); } \ + inline vtyp vx_##loadsfx##_low(const typ* ptr) { return prefix##_##loadsfx##_low(ptr); } \ + inline vtyp vx_##loadsfx##_halves(const typ* ptr0, const typ* ptr1) { return prefix##_##loadsfx##_halves(ptr0, ptr1); } \ + inline void vx_store(typ* ptr, const vtyp& v) { return v_store(ptr, v); } \ + inline void vx_store_aligned(typ* ptr, const vtyp& v) { return v_store_aligned(ptr, v); } \ + inline vtyp vx_lut(const typ* ptr, const int* idx) { return prefix##_lut(ptr, idx); } \ + inline vtyp vx_lut_pairs(const typ* ptr, const int* idx) { return prefix##_lut_pairs(ptr, idx); } + +#define CV_INTRIN_DEFINE_WIDE_LUT_QUAD(typ, vtyp, prefix) \ + inline vtyp vx_lut_quads(const typ* ptr, const int* idx) { return prefix##_lut_quads(ptr, idx); } + +#define CV_INTRIN_DEFINE_WIDE_LOAD_EXPAND(typ, wtyp, prefix) \ + inline wtyp vx_load_expand(const typ* ptr) { return prefix##_load_expand(ptr); } + +#define CV_INTRIN_DEFINE_WIDE_LOAD_EXPAND_Q(typ, qtyp, prefix) \ + inline qtyp vx_load_expand_q(const typ* ptr) { return prefix##_load_expand_q(ptr); } + +#define CV_INTRIN_DEFINE_WIDE_INTRIN_WITH_EXPAND(typ, vtyp, short_typ, wtyp, qtyp, prefix, loadsfx) \ + CV_INTRIN_DEFINE_WIDE_INTRIN(typ, vtyp, short_typ, prefix, loadsfx) \ + CV_INTRIN_DEFINE_WIDE_LUT_QUAD(typ, vtyp, prefix) \ + CV_INTRIN_DEFINE_WIDE_LOAD_EXPAND(typ, wtyp, prefix) \ + CV_INTRIN_DEFINE_WIDE_LOAD_EXPAND_Q(typ, qtyp, prefix) + +#define CV_INTRIN_DEFINE_WIDE_INTRIN_ALL_TYPES(prefix) \ + CV_INTRIN_DEFINE_WIDE_INTRIN_WITH_EXPAND(uchar, v_uint8, u8, v_uint16, v_uint32, prefix, load) \ + CV_INTRIN_DEFINE_WIDE_INTRIN_WITH_EXPAND(schar, v_int8, s8, v_int16, v_int32, prefix, load) \ + CV_INTRIN_DEFINE_WIDE_INTRIN(ushort, v_uint16, u16, prefix, load) \ + CV_INTRIN_DEFINE_WIDE_LUT_QUAD(ushort, v_uint16, prefix) \ + CV_INTRIN_DEFINE_WIDE_LOAD_EXPAND(ushort, v_uint32, prefix) \ + CV_INTRIN_DEFINE_WIDE_INTRIN(short, v_int16, s16, prefix, load) \ + CV_INTRIN_DEFINE_WIDE_LUT_QUAD(short, v_int16, prefix) \ + CV_INTRIN_DEFINE_WIDE_LOAD_EXPAND(short, v_int32, prefix) \ + CV_INTRIN_DEFINE_WIDE_INTRIN(int, v_int32, s32, prefix, load) \ + CV_INTRIN_DEFINE_WIDE_LUT_QUAD(int, v_int32, prefix) \ + CV_INTRIN_DEFINE_WIDE_LOAD_EXPAND(int, v_int64, prefix) \ + CV_INTRIN_DEFINE_WIDE_INTRIN(unsigned, v_uint32, u32, prefix, load) \ + CV_INTRIN_DEFINE_WIDE_LUT_QUAD(unsigned, v_uint32, prefix) \ + CV_INTRIN_DEFINE_WIDE_LOAD_EXPAND(unsigned, v_uint64, prefix) \ + CV_INTRIN_DEFINE_WIDE_INTRIN(float, v_float32, f32, prefix, load) \ + CV_INTRIN_DEFINE_WIDE_LUT_QUAD(float, v_float32, prefix) \ + CV_INTRIN_DEFINE_WIDE_INTRIN(int64, v_int64, s64, prefix, load) \ + CV_INTRIN_DEFINE_WIDE_INTRIN(uint64, v_uint64, u64, prefix, load) \ + CV_INTRIN_DEFINE_WIDE_LOAD_EXPAND(float16_t, v_float32, prefix) + +template struct V_RegTraits +{ +}; + +#define CV_DEF_REG_TRAITS(prefix, _reg, lane_type, suffix, _u_reg, _w_reg, _q_reg, _int_reg, _round_reg) \ + template<> struct V_RegTraits<_reg> \ + { \ + typedef _reg reg; \ + typedef _u_reg u_reg; \ + typedef _w_reg w_reg; \ + typedef _q_reg q_reg; \ + typedef _int_reg int_reg; \ + typedef _round_reg round_reg; \ + } + +#if CV_SIMD128 || CV_SIMD128_CPP + CV_DEF_REG_TRAITS(v, v_uint8x16, uchar, u8, v_uint8x16, v_uint16x8, v_uint32x4, v_int8x16, void); + CV_DEF_REG_TRAITS(v, v_int8x16, schar, s8, v_uint8x16, v_int16x8, v_int32x4, v_int8x16, void); + CV_DEF_REG_TRAITS(v, v_uint16x8, ushort, u16, v_uint16x8, v_uint32x4, v_uint64x2, v_int16x8, void); + CV_DEF_REG_TRAITS(v, v_int16x8, short, s16, v_uint16x8, v_int32x4, v_int64x2, v_int16x8, void); + CV_DEF_REG_TRAITS(v, v_uint32x4, unsigned, u32, v_uint32x4, v_uint64x2, void, v_int32x4, void); + CV_DEF_REG_TRAITS(v, v_int32x4, int, s32, v_uint32x4, v_int64x2, void, v_int32x4, void); +#if CV_SIMD128_64F || CV_SIMD128_CPP + CV_DEF_REG_TRAITS(v, v_float32x4, float, f32, v_float32x4, v_float64x2, void, v_int32x4, v_int32x4); #else + CV_DEF_REG_TRAITS(v, v_float32x4, float, f32, v_float32x4, void, void, v_int32x4, v_int32x4); +#endif + CV_DEF_REG_TRAITS(v, v_uint64x2, uint64, u64, v_uint64x2, void, void, v_int64x2, void); + CV_DEF_REG_TRAITS(v, v_int64x2, int64, s64, v_uint64x2, void, void, v_int64x2, void); +#if CV_SIMD128_64F + CV_DEF_REG_TRAITS(v, v_float64x2, double, f64, v_float64x2, void, void, v_int64x2, v_int32x4); +#endif +#endif -#include "opencv2/core/hal/intrin_cpp.hpp" +#if CV_SIMD256 + CV_DEF_REG_TRAITS(v256, v_uint8x32, uchar, u8, v_uint8x32, v_uint16x16, v_uint32x8, v_int8x32, void); + CV_DEF_REG_TRAITS(v256, v_int8x32, schar, s8, v_uint8x32, v_int16x16, v_int32x8, v_int8x32, void); + CV_DEF_REG_TRAITS(v256, v_uint16x16, ushort, u16, v_uint16x16, v_uint32x8, v_uint64x4, v_int16x16, void); + CV_DEF_REG_TRAITS(v256, v_int16x16, short, s16, v_uint16x16, v_int32x8, v_int64x4, v_int16x16, void); + CV_DEF_REG_TRAITS(v256, v_uint32x8, unsigned, u32, v_uint32x8, v_uint64x4, void, v_int32x8, void); + CV_DEF_REG_TRAITS(v256, v_int32x8, int, s32, v_uint32x8, v_int64x4, void, v_int32x8, void); + CV_DEF_REG_TRAITS(v256, v_float32x8, float, f32, v_float32x8, v_float64x4, void, v_int32x8, v_int32x8); + CV_DEF_REG_TRAITS(v256, v_uint64x4, uint64, u64, v_uint64x4, void, void, v_int64x4, void); + CV_DEF_REG_TRAITS(v256, v_int64x4, int64, s64, v_uint64x4, void, void, v_int64x4, void); + CV_DEF_REG_TRAITS(v256, v_float64x4, double, f64, v_float64x4, void, void, v_int64x4, v_int32x8); +#endif +#if CV_SIMD512 + CV_DEF_REG_TRAITS(v512, v_uint8x64, uchar, u8, v_uint8x64, v_uint16x32, v_uint32x16, v_int8x64, void); + CV_DEF_REG_TRAITS(v512, v_int8x64, schar, s8, v_uint8x64, v_int16x32, v_int32x16, v_int8x64, void); + CV_DEF_REG_TRAITS(v512, v_uint16x32, ushort, u16, v_uint16x32, v_uint32x16, v_uint64x8, v_int16x32, void); + CV_DEF_REG_TRAITS(v512, v_int16x32, short, s16, v_uint16x32, v_int32x16, v_int64x8, v_int16x32, void); + CV_DEF_REG_TRAITS(v512, v_uint32x16, unsigned, u32, v_uint32x16, v_uint64x8, void, v_int32x16, void); + CV_DEF_REG_TRAITS(v512, v_int32x16, int, s32, v_uint32x16, v_int64x8, void, v_int32x16, void); + CV_DEF_REG_TRAITS(v512, v_float32x16, float, f32, v_float32x16, v_float64x8, void, v_int32x16, v_int32x16); + CV_DEF_REG_TRAITS(v512, v_uint64x8, uint64, u64, v_uint64x8, void, void, v_int64x8, void); + CV_DEF_REG_TRAITS(v512, v_int64x8, int64, s64, v_uint64x8, void, void, v_int64x8, void); + CV_DEF_REG_TRAITS(v512, v_float64x8, double, f64, v_float64x8, void, void, v_int64x8, v_int32x16); #endif -//! @addtogroup core_hal_intrin -//! @{ +#if CV_SIMD512 && (!defined(CV__SIMD_FORCE_WIDTH) || CV__SIMD_FORCE_WIDTH == 512) +#define CV__SIMD_NAMESPACE simd512 +namespace CV__SIMD_NAMESPACE { + #define CV_SIMD 1 + #define CV_SIMD_64F CV_SIMD512_64F + #define CV_SIMD_FP16 CV_SIMD512_FP16 + #define CV_SIMD_WIDTH 64 + typedef v_uint8x64 v_uint8; + typedef v_int8x64 v_int8; + typedef v_uint16x32 v_uint16; + typedef v_int16x32 v_int16; + typedef v_uint32x16 v_uint32; + typedef v_int32x16 v_int32; + typedef v_uint64x8 v_uint64; + typedef v_int64x8 v_int64; + typedef v_float32x16 v_float32; + CV_INTRIN_DEFINE_WIDE_INTRIN_ALL_TYPES(v512) +#if CV_SIMD512_64F + typedef v_float64x8 v_float64; + CV_INTRIN_DEFINE_WIDE_INTRIN(double, v_float64, f64, v512, load) +#endif + inline void vx_cleanup() { v512_cleanup(); } +} // namespace +using namespace CV__SIMD_NAMESPACE; +#elif CV_SIMD256 && (!defined(CV__SIMD_FORCE_WIDTH) || CV__SIMD_FORCE_WIDTH == 256) +#define CV__SIMD_NAMESPACE simd256 +namespace CV__SIMD_NAMESPACE { + #define CV_SIMD 1 + #define CV_SIMD_64F CV_SIMD256_64F + #define CV_SIMD_FP16 CV_SIMD256_FP16 + #define CV_SIMD_WIDTH 32 + typedef v_uint8x32 v_uint8; + typedef v_int8x32 v_int8; + typedef v_uint16x16 v_uint16; + typedef v_int16x16 v_int16; + typedef v_uint32x8 v_uint32; + typedef v_int32x8 v_int32; + typedef v_uint64x4 v_uint64; + typedef v_int64x4 v_int64; + typedef v_float32x8 v_float32; + CV_INTRIN_DEFINE_WIDE_INTRIN_ALL_TYPES(v256) + #if CV_SIMD256_64F + typedef v_float64x4 v_float64; + CV_INTRIN_DEFINE_WIDE_INTRIN(double, v_float64, f64, v256, load) + #endif + inline void vx_cleanup() { v256_cleanup(); } +} // namespace +using namespace CV__SIMD_NAMESPACE; +#elif (CV_SIMD128 || CV_SIMD128_CPP) && (!defined(CV__SIMD_FORCE_WIDTH) || CV__SIMD_FORCE_WIDTH == 128) +#if defined CV_SIMD128_CPP +#define CV__SIMD_NAMESPACE simd128_cpp +#else +#define CV__SIMD_NAMESPACE simd128 +#endif +namespace CV__SIMD_NAMESPACE { + #define CV_SIMD CV_SIMD128 + #define CV_SIMD_64F CV_SIMD128_64F + #define CV_SIMD_WIDTH 16 + typedef v_uint8x16 v_uint8; + typedef v_int8x16 v_int8; + typedef v_uint16x8 v_uint16; + typedef v_int16x8 v_int16; + typedef v_uint32x4 v_uint32; + typedef v_int32x4 v_int32; + typedef v_uint64x2 v_uint64; + typedef v_int64x2 v_int64; + typedef v_float32x4 v_float32; + CV_INTRIN_DEFINE_WIDE_INTRIN_ALL_TYPES(v) + #if CV_SIMD128_64F + typedef v_float64x2 v_float64; + CV_INTRIN_DEFINE_WIDE_INTRIN(double, v_float64, f64, v, load) + #endif + inline void vx_cleanup() { v_cleanup(); } +} // namespace +using namespace CV__SIMD_NAMESPACE; +#endif -#ifndef CV_SIMD128 -//! Set to 1 if current compiler supports vector extensions (NEON or SSE is enabled) -#define CV_SIMD128 0 +#ifndef CV_SIMD_64F +#define CV_SIMD_64F 0 #endif -#ifndef CV_SIMD128_64F -//! Set to 1 if current intrinsics implementation supports 64-bit float vectors -#define CV_SIMD128_64F 0 +#ifndef CV_SIMD_FP16 +#define CV_SIMD_FP16 0 //!< Defined to 1 on native support of operations with float16x8_t / float16x16_t (SIMD256) types #endif -//! @} +#ifndef CV_SIMD +#define CV_SIMD 0 +#endif + +#include "simd_utils.impl.hpp" + +#ifndef CV_DOXYGEN +CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END +#endif + +} // cv:: + +//! @endcond #endif diff --git a/IPL/include/opencv/opencv2/core/hal/intrin_avx.hpp b/IPL/include/opencv/opencv2/core/hal/intrin_avx.hpp new file mode 100644 index 0000000..ca315ae --- /dev/null +++ b/IPL/include/opencv/opencv2/core/hal/intrin_avx.hpp @@ -0,0 +1,3125 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html + +#ifndef OPENCV_HAL_INTRIN_AVX_HPP +#define OPENCV_HAL_INTRIN_AVX_HPP + +#define CV_SIMD256 1 +#define CV_SIMD256_64F 1 +#define CV_SIMD256_FP16 0 // no native operations with FP16 type. Only load/store from float32x8 are available (if CV_FP16 == 1) + +namespace cv +{ + +//! @cond IGNORED + +CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN + +///////// Utils //////////// + +inline __m256i _v256_combine(const __m128i& lo, const __m128i& hi) +{ return _mm256_inserti128_si256(_mm256_castsi128_si256(lo), hi, 1); } + +inline __m256 _v256_combine(const __m128& lo, const __m128& hi) +{ return _mm256_insertf128_ps(_mm256_castps128_ps256(lo), hi, 1); } + +inline __m256d _v256_combine(const __m128d& lo, const __m128d& hi) +{ return _mm256_insertf128_pd(_mm256_castpd128_pd256(lo), hi, 1); } + +inline int _v_cvtsi256_si32(const __m256i& a) +{ return _mm_cvtsi128_si32(_mm256_castsi256_si128(a)); } + +inline __m256i _v256_shuffle_odd_64(const __m256i& v) +{ return _mm256_permute4x64_epi64(v, _MM_SHUFFLE(3, 1, 2, 0)); } + +inline __m256d _v256_shuffle_odd_64(const __m256d& v) +{ return _mm256_permute4x64_pd(v, _MM_SHUFFLE(3, 1, 2, 0)); } + +template +inline __m256i _v256_permute2x128(const __m256i& a, const __m256i& b) +{ return _mm256_permute2x128_si256(a, b, imm); } + +template +inline __m256 _v256_permute2x128(const __m256& a, const __m256& b) +{ return _mm256_permute2f128_ps(a, b, imm); } + +template +inline __m256d _v256_permute2x128(const __m256d& a, const __m256d& b) +{ return _mm256_permute2f128_pd(a, b, imm); } + +template +inline _Tpvec v256_permute2x128(const _Tpvec& a, const _Tpvec& b) +{ return _Tpvec(_v256_permute2x128(a.val, b.val)); } + +template +inline __m256i _v256_permute4x64(const __m256i& a) +{ return _mm256_permute4x64_epi64(a, imm); } + +template +inline __m256d _v256_permute4x64(const __m256d& a) +{ return _mm256_permute4x64_pd(a, imm); } + +template +inline _Tpvec v256_permute4x64(const _Tpvec& a) +{ return _Tpvec(_v256_permute4x64(a.val)); } + +inline __m128i _v256_extract_high(const __m256i& v) +{ return _mm256_extracti128_si256(v, 1); } + +inline __m128 _v256_extract_high(const __m256& v) +{ return _mm256_extractf128_ps(v, 1); } + +inline __m128d _v256_extract_high(const __m256d& v) +{ return _mm256_extractf128_pd(v, 1); } + +inline __m128i _v256_extract_low(const __m256i& v) +{ return _mm256_castsi256_si128(v); } + +inline __m128 _v256_extract_low(const __m256& v) +{ return _mm256_castps256_ps128(v); } + +inline __m128d _v256_extract_low(const __m256d& v) +{ return _mm256_castpd256_pd128(v); } + +inline __m256i _v256_packs_epu32(const __m256i& a, const __m256i& b) +{ + const __m256i m = _mm256_set1_epi32(65535); + __m256i am = _mm256_min_epu32(a, m); + __m256i bm = _mm256_min_epu32(b, m); + return _mm256_packus_epi32(am, bm); +} + +template +inline int _v256_extract_epi8(const __m256i& a) +{ +#if defined(CV__SIMD_HAVE_mm256_extract_epi8) || (CV_AVX2 && (!defined(_MSC_VER) || _MSC_VER >= 1910/*MSVS 2017*/)) + return _mm256_extract_epi8(a, i); +#else + __m128i b = _mm256_extractf128_si256(a, ((i) >> 4)); + return _mm_extract_epi8(b, i & 15); // SSE4.1 +#endif +} + +template +inline int _v256_extract_epi16(const __m256i& a) +{ +#if defined(CV__SIMD_HAVE_mm256_extract_epi8) || (CV_AVX2 && (!defined(_MSC_VER) || _MSC_VER >= 1910/*MSVS 2017*/)) + return _mm256_extract_epi16(a, i); +#else + __m128i b = _mm256_extractf128_si256(a, ((i) >> 3)); + return _mm_extract_epi16(b, i & 7); // SSE2 +#endif +} + +template +inline int _v256_extract_epi32(const __m256i& a) +{ +#if defined(CV__SIMD_HAVE_mm256_extract_epi8) || (CV_AVX2 && (!defined(_MSC_VER) || _MSC_VER >= 1910/*MSVS 2017*/)) + return _mm256_extract_epi32(a, i); +#else + __m128i b = _mm256_extractf128_si256(a, ((i) >> 2)); + return _mm_extract_epi32(b, i & 3); // SSE4.1 +#endif +} + +template +inline int64 _v256_extract_epi64(const __m256i& a) +{ +#if defined(CV__SIMD_HAVE_mm256_extract_epi8) || (CV_AVX2 && (!defined(_MSC_VER) || _MSC_VER >= 1910/*MSVS 2017*/)) + return _mm256_extract_epi64(a, i); +#else + __m128i b = _mm256_extractf128_si256(a, ((i) >> 1)); + return _mm_extract_epi64(b, i & 1); // SSE4.1 +#endif +} + +///////// Types //////////// + +struct v_uint8x32 +{ + typedef uchar lane_type; + enum { nlanes = 32 }; + __m256i val; + + explicit v_uint8x32(__m256i v) : val(v) {} + v_uint8x32(uchar v0, uchar v1, uchar v2, uchar v3, + uchar v4, uchar v5, uchar v6, uchar v7, + uchar v8, uchar v9, uchar v10, uchar v11, + uchar v12, uchar v13, uchar v14, uchar v15, + uchar v16, uchar v17, uchar v18, uchar v19, + uchar v20, uchar v21, uchar v22, uchar v23, + uchar v24, uchar v25, uchar v26, uchar v27, + uchar v28, uchar v29, uchar v30, uchar v31) + { + val = _mm256_setr_epi8((char)v0, (char)v1, (char)v2, (char)v3, + (char)v4, (char)v5, (char)v6 , (char)v7, (char)v8, (char)v9, + (char)v10, (char)v11, (char)v12, (char)v13, (char)v14, (char)v15, + (char)v16, (char)v17, (char)v18, (char)v19, (char)v20, (char)v21, + (char)v22, (char)v23, (char)v24, (char)v25, (char)v26, (char)v27, + (char)v28, (char)v29, (char)v30, (char)v31); + } + v_uint8x32() : val(_mm256_setzero_si256()) {} + uchar get0() const { return (uchar)_v_cvtsi256_si32(val); } +}; + +struct v_int8x32 +{ + typedef schar lane_type; + enum { nlanes = 32 }; + __m256i val; + + explicit v_int8x32(__m256i v) : val(v) {} + v_int8x32(schar v0, schar v1, schar v2, schar v3, + schar v4, schar v5, schar v6, schar v7, + schar v8, schar v9, schar v10, schar v11, + schar v12, schar v13, schar v14, schar v15, + schar v16, schar v17, schar v18, schar v19, + schar v20, schar v21, schar v22, schar v23, + schar v24, schar v25, schar v26, schar v27, + schar v28, schar v29, schar v30, schar v31) + { + val = _mm256_setr_epi8(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, + v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, + v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31); + } + v_int8x32() : val(_mm256_setzero_si256()) {} + schar get0() const { return (schar)_v_cvtsi256_si32(val); } +}; + +struct v_uint16x16 +{ + typedef ushort lane_type; + enum { nlanes = 16 }; + __m256i val; + + explicit v_uint16x16(__m256i v) : val(v) {} + v_uint16x16(ushort v0, ushort v1, ushort v2, ushort v3, + ushort v4, ushort v5, ushort v6, ushort v7, + ushort v8, ushort v9, ushort v10, ushort v11, + ushort v12, ushort v13, ushort v14, ushort v15) + { + val = _mm256_setr_epi16((short)v0, (short)v1, (short)v2, (short)v3, + (short)v4, (short)v5, (short)v6, (short)v7, (short)v8, (short)v9, + (short)v10, (short)v11, (short)v12, (short)v13, (short)v14, (short)v15); + } + v_uint16x16() : val(_mm256_setzero_si256()) {} + ushort get0() const { return (ushort)_v_cvtsi256_si32(val); } +}; + +struct v_int16x16 +{ + typedef short lane_type; + enum { nlanes = 16 }; + __m256i val; + + explicit v_int16x16(__m256i v) : val(v) {} + v_int16x16(short v0, short v1, short v2, short v3, + short v4, short v5, short v6, short v7, + short v8, short v9, short v10, short v11, + short v12, short v13, short v14, short v15) + { + val = _mm256_setr_epi16(v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10, v11, v12, v13, v14, v15); + } + v_int16x16() : val(_mm256_setzero_si256()) {} + short get0() const { return (short)_v_cvtsi256_si32(val); } +}; + +struct v_uint32x8 +{ + typedef unsigned lane_type; + enum { nlanes = 8 }; + __m256i val; + + explicit v_uint32x8(__m256i v) : val(v) {} + v_uint32x8(unsigned v0, unsigned v1, unsigned v2, unsigned v3, + unsigned v4, unsigned v5, unsigned v6, unsigned v7) + { + val = _mm256_setr_epi32((unsigned)v0, (unsigned)v1, (unsigned)v2, + (unsigned)v3, (unsigned)v4, (unsigned)v5, (unsigned)v6, (unsigned)v7); + } + v_uint32x8() : val(_mm256_setzero_si256()) {} + unsigned get0() const { return (unsigned)_v_cvtsi256_si32(val); } +}; + +struct v_int32x8 +{ + typedef int lane_type; + enum { nlanes = 8 }; + __m256i val; + + explicit v_int32x8(__m256i v) : val(v) {} + v_int32x8(int v0, int v1, int v2, int v3, + int v4, int v5, int v6, int v7) + { + val = _mm256_setr_epi32(v0, v1, v2, v3, v4, v5, v6, v7); + } + v_int32x8() : val(_mm256_setzero_si256()) {} + int get0() const { return _v_cvtsi256_si32(val); } +}; + +struct v_float32x8 +{ + typedef float lane_type; + enum { nlanes = 8 }; + __m256 val; + + explicit v_float32x8(__m256 v) : val(v) {} + v_float32x8(float v0, float v1, float v2, float v3, + float v4, float v5, float v6, float v7) + { + val = _mm256_setr_ps(v0, v1, v2, v3, v4, v5, v6, v7); + } + v_float32x8() : val(_mm256_setzero_ps()) {} + float get0() const { return _mm_cvtss_f32(_mm256_castps256_ps128(val)); } +}; + +struct v_uint64x4 +{ + typedef uint64 lane_type; + enum { nlanes = 4 }; + __m256i val; + + explicit v_uint64x4(__m256i v) : val(v) {} + v_uint64x4(uint64 v0, uint64 v1, uint64 v2, uint64 v3) + { val = _mm256_setr_epi64x((int64)v0, (int64)v1, (int64)v2, (int64)v3); } + v_uint64x4() : val(_mm256_setzero_si256()) {} + uint64 get0() const + { + #if defined __x86_64__ || defined _M_X64 + return (uint64)_mm_cvtsi128_si64(_mm256_castsi256_si128(val)); + #else + int a = _mm_cvtsi128_si32(_mm256_castsi256_si128(val)); + int b = _mm_cvtsi128_si32(_mm256_castsi256_si128(_mm256_srli_epi64(val, 32))); + return (unsigned)a | ((uint64)(unsigned)b << 32); + #endif + } +}; + +struct v_int64x4 +{ + typedef int64 lane_type; + enum { nlanes = 4 }; + __m256i val; + + explicit v_int64x4(__m256i v) : val(v) {} + v_int64x4(int64 v0, int64 v1, int64 v2, int64 v3) + { val = _mm256_setr_epi64x(v0, v1, v2, v3); } + v_int64x4() : val(_mm256_setzero_si256()) {} + + int64 get0() const + { + #if defined __x86_64__ || defined _M_X64 + return (int64)_mm_cvtsi128_si64(_mm256_castsi256_si128(val)); + #else + int a = _mm_cvtsi128_si32(_mm256_castsi256_si128(val)); + int b = _mm_cvtsi128_si32(_mm256_castsi256_si128(_mm256_srli_epi64(val, 32))); + return (int64)((unsigned)a | ((uint64)(unsigned)b << 32)); + #endif + } +}; + +struct v_float64x4 +{ + typedef double lane_type; + enum { nlanes = 4 }; + __m256d val; + + explicit v_float64x4(__m256d v) : val(v) {} + v_float64x4(double v0, double v1, double v2, double v3) + { val = _mm256_setr_pd(v0, v1, v2, v3); } + v_float64x4() : val(_mm256_setzero_pd()) {} + double get0() const { return _mm_cvtsd_f64(_mm256_castpd256_pd128(val)); } +}; + +//////////////// Load and store operations /////////////// + +#define OPENCV_HAL_IMPL_AVX_LOADSTORE(_Tpvec, _Tp) \ + inline _Tpvec v256_load(const _Tp* ptr) \ + { return _Tpvec(_mm256_loadu_si256((const __m256i*)ptr)); } \ + inline _Tpvec v256_load_aligned(const _Tp* ptr) \ + { return _Tpvec(_mm256_load_si256((const __m256i*)ptr)); } \ + inline _Tpvec v256_load_low(const _Tp* ptr) \ + { \ + __m128i v128 = _mm_loadu_si128((const __m128i*)ptr); \ + return _Tpvec(_mm256_castsi128_si256(v128)); \ + } \ + inline _Tpvec v256_load_halves(const _Tp* ptr0, const _Tp* ptr1) \ + { \ + __m128i vlo = _mm_loadu_si128((const __m128i*)ptr0); \ + __m128i vhi = _mm_loadu_si128((const __m128i*)ptr1); \ + return _Tpvec(_v256_combine(vlo, vhi)); \ + } \ + inline void v_store(_Tp* ptr, const _Tpvec& a) \ + { _mm256_storeu_si256((__m256i*)ptr, a.val); } \ + inline void v_store_aligned(_Tp* ptr, const _Tpvec& a) \ + { _mm256_store_si256((__m256i*)ptr, a.val); } \ + inline void v_store_aligned_nocache(_Tp* ptr, const _Tpvec& a) \ + { _mm256_stream_si256((__m256i*)ptr, a.val); } \ + inline void v_store(_Tp* ptr, const _Tpvec& a, hal::StoreMode mode) \ + { \ + if( mode == hal::STORE_UNALIGNED ) \ + _mm256_storeu_si256((__m256i*)ptr, a.val); \ + else if( mode == hal::STORE_ALIGNED_NOCACHE ) \ + _mm256_stream_si256((__m256i*)ptr, a.val); \ + else \ + _mm256_store_si256((__m256i*)ptr, a.val); \ + } \ + inline void v_store_low(_Tp* ptr, const _Tpvec& a) \ + { _mm_storeu_si128((__m128i*)ptr, _v256_extract_low(a.val)); } \ + inline void v_store_high(_Tp* ptr, const _Tpvec& a) \ + { _mm_storeu_si128((__m128i*)ptr, _v256_extract_high(a.val)); } + +OPENCV_HAL_IMPL_AVX_LOADSTORE(v_uint8x32, uchar) +OPENCV_HAL_IMPL_AVX_LOADSTORE(v_int8x32, schar) +OPENCV_HAL_IMPL_AVX_LOADSTORE(v_uint16x16, ushort) +OPENCV_HAL_IMPL_AVX_LOADSTORE(v_int16x16, short) +OPENCV_HAL_IMPL_AVX_LOADSTORE(v_uint32x8, unsigned) +OPENCV_HAL_IMPL_AVX_LOADSTORE(v_int32x8, int) +OPENCV_HAL_IMPL_AVX_LOADSTORE(v_uint64x4, uint64) +OPENCV_HAL_IMPL_AVX_LOADSTORE(v_int64x4, int64) + +#define OPENCV_HAL_IMPL_AVX_LOADSTORE_FLT(_Tpvec, _Tp, suffix, halfreg) \ + inline _Tpvec v256_load(const _Tp* ptr) \ + { return _Tpvec(_mm256_loadu_##suffix(ptr)); } \ + inline _Tpvec v256_load_aligned(const _Tp* ptr) \ + { return _Tpvec(_mm256_load_##suffix(ptr)); } \ + inline _Tpvec v256_load_low(const _Tp* ptr) \ + { \ + return _Tpvec(_mm256_cast##suffix##128_##suffix##256 \ + (_mm_loadu_##suffix(ptr))); \ + } \ + inline _Tpvec v256_load_halves(const _Tp* ptr0, const _Tp* ptr1) \ + { \ + halfreg vlo = _mm_loadu_##suffix(ptr0); \ + halfreg vhi = _mm_loadu_##suffix(ptr1); \ + return _Tpvec(_v256_combine(vlo, vhi)); \ + } \ + inline void v_store(_Tp* ptr, const _Tpvec& a) \ + { _mm256_storeu_##suffix(ptr, a.val); } \ + inline void v_store_aligned(_Tp* ptr, const _Tpvec& a) \ + { _mm256_store_##suffix(ptr, a.val); } \ + inline void v_store_aligned_nocache(_Tp* ptr, const _Tpvec& a) \ + { _mm256_stream_##suffix(ptr, a.val); } \ + inline void v_store(_Tp* ptr, const _Tpvec& a, hal::StoreMode mode) \ + { \ + if( mode == hal::STORE_UNALIGNED ) \ + _mm256_storeu_##suffix(ptr, a.val); \ + else if( mode == hal::STORE_ALIGNED_NOCACHE ) \ + _mm256_stream_##suffix(ptr, a.val); \ + else \ + _mm256_store_##suffix(ptr, a.val); \ + } \ + inline void v_store_low(_Tp* ptr, const _Tpvec& a) \ + { _mm_storeu_##suffix(ptr, _v256_extract_low(a.val)); } \ + inline void v_store_high(_Tp* ptr, const _Tpvec& a) \ + { _mm_storeu_##suffix(ptr, _v256_extract_high(a.val)); } + +OPENCV_HAL_IMPL_AVX_LOADSTORE_FLT(v_float32x8, float, ps, __m128) +OPENCV_HAL_IMPL_AVX_LOADSTORE_FLT(v_float64x4, double, pd, __m128d) + +#define OPENCV_HAL_IMPL_AVX_CAST(_Tpvec, _Tpvecf, suffix, cast) \ + inline _Tpvec v_reinterpret_as_##suffix(const _Tpvecf& a) \ + { return _Tpvec(cast(a.val)); } + +#define OPENCV_HAL_IMPL_AVX_INIT(_Tpvec, _Tp, suffix, ssuffix, ctype_s) \ + inline _Tpvec v256_setzero_##suffix() \ + { return _Tpvec(_mm256_setzero_si256()); } \ + inline _Tpvec v256_setall_##suffix(_Tp v) \ + { return _Tpvec(_mm256_set1_##ssuffix((ctype_s)v)); } \ + OPENCV_HAL_IMPL_AVX_CAST(_Tpvec, v_uint8x32, suffix, OPENCV_HAL_NOP) \ + OPENCV_HAL_IMPL_AVX_CAST(_Tpvec, v_int8x32, suffix, OPENCV_HAL_NOP) \ + OPENCV_HAL_IMPL_AVX_CAST(_Tpvec, v_uint16x16, suffix, OPENCV_HAL_NOP) \ + OPENCV_HAL_IMPL_AVX_CAST(_Tpvec, v_int16x16, suffix, OPENCV_HAL_NOP) \ + OPENCV_HAL_IMPL_AVX_CAST(_Tpvec, v_uint32x8, suffix, OPENCV_HAL_NOP) \ + OPENCV_HAL_IMPL_AVX_CAST(_Tpvec, v_int32x8, suffix, OPENCV_HAL_NOP) \ + OPENCV_HAL_IMPL_AVX_CAST(_Tpvec, v_uint64x4, suffix, OPENCV_HAL_NOP) \ + OPENCV_HAL_IMPL_AVX_CAST(_Tpvec, v_int64x4, suffix, OPENCV_HAL_NOP) \ + OPENCV_HAL_IMPL_AVX_CAST(_Tpvec, v_float32x8, suffix, _mm256_castps_si256) \ + OPENCV_HAL_IMPL_AVX_CAST(_Tpvec, v_float64x4, suffix, _mm256_castpd_si256) + +OPENCV_HAL_IMPL_AVX_INIT(v_uint8x32, uchar, u8, epi8, char) +OPENCV_HAL_IMPL_AVX_INIT(v_int8x32, schar, s8, epi8, char) +OPENCV_HAL_IMPL_AVX_INIT(v_uint16x16, ushort, u16, epi16, short) +OPENCV_HAL_IMPL_AVX_INIT(v_int16x16, short, s16, epi16, short) +OPENCV_HAL_IMPL_AVX_INIT(v_uint32x8, unsigned, u32, epi32, int) +OPENCV_HAL_IMPL_AVX_INIT(v_int32x8, int, s32, epi32, int) +OPENCV_HAL_IMPL_AVX_INIT(v_uint64x4, uint64, u64, epi64x, int64) +OPENCV_HAL_IMPL_AVX_INIT(v_int64x4, int64, s64, epi64x, int64) + +#define OPENCV_HAL_IMPL_AVX_INIT_FLT(_Tpvec, _Tp, suffix, zsuffix, cast) \ + inline _Tpvec v256_setzero_##suffix() \ + { return _Tpvec(_mm256_setzero_##zsuffix()); } \ + inline _Tpvec v256_setall_##suffix(_Tp v) \ + { return _Tpvec(_mm256_set1_##zsuffix(v)); } \ + OPENCV_HAL_IMPL_AVX_CAST(_Tpvec, v_uint8x32, suffix, cast) \ + OPENCV_HAL_IMPL_AVX_CAST(_Tpvec, v_int8x32, suffix, cast) \ + OPENCV_HAL_IMPL_AVX_CAST(_Tpvec, v_uint16x16, suffix, cast) \ + OPENCV_HAL_IMPL_AVX_CAST(_Tpvec, v_int16x16, suffix, cast) \ + OPENCV_HAL_IMPL_AVX_CAST(_Tpvec, v_uint32x8, suffix, cast) \ + OPENCV_HAL_IMPL_AVX_CAST(_Tpvec, v_int32x8, suffix, cast) \ + OPENCV_HAL_IMPL_AVX_CAST(_Tpvec, v_uint64x4, suffix, cast) \ + OPENCV_HAL_IMPL_AVX_CAST(_Tpvec, v_int64x4, suffix, cast) + +OPENCV_HAL_IMPL_AVX_INIT_FLT(v_float32x8, float, f32, ps, _mm256_castsi256_ps) +OPENCV_HAL_IMPL_AVX_INIT_FLT(v_float64x4, double, f64, pd, _mm256_castsi256_pd) + +inline v_float32x8 v_reinterpret_as_f32(const v_float32x8& a) +{ return a; } +inline v_float32x8 v_reinterpret_as_f32(const v_float64x4& a) +{ return v_float32x8(_mm256_castpd_ps(a.val)); } + +inline v_float64x4 v_reinterpret_as_f64(const v_float64x4& a) +{ return a; } +inline v_float64x4 v_reinterpret_as_f64(const v_float32x8& a) +{ return v_float64x4(_mm256_castps_pd(a.val)); } + +/* Recombine */ +/*#define OPENCV_HAL_IMPL_AVX_COMBINE(_Tpvec, perm) \ + inline _Tpvec v_combine_low(const _Tpvec& a, const _Tpvec& b) \ + { return _Tpvec(perm(a.val, b.val, 0x20)); } \ + inline _Tpvec v_combine_high(const _Tpvec& a, const _Tpvec& b) \ + { return _Tpvec(perm(a.val, b.val, 0x31)); } \ + inline void v_recombine(const _Tpvec& a, const _Tpvec& b, \ + _Tpvec& c, _Tpvec& d) \ + { c = v_combine_low(a, b); d = v_combine_high(a, b); } + +#define OPENCV_HAL_IMPL_AVX_UNPACKS(_Tpvec, suffix) \ + OPENCV_HAL_IMPL_AVX_COMBINE(_Tpvec, _mm256_permute2x128_si256) \ + inline void v_zip(const _Tpvec& a0, const _Tpvec& a1, \ + _Tpvec& b0, _Tpvec& b1) \ + { \ + __m256i v0 = _v256_shuffle_odd_64(a0.val); \ + __m256i v1 = _v256_shuffle_odd_64(a1.val); \ + b0.val = _mm256_unpacklo_##suffix(v0, v1); \ + b1.val = _mm256_unpackhi_##suffix(v0, v1); \ + } + +OPENCV_HAL_IMPL_AVX_UNPACKS(v_uint8x32, epi8) +OPENCV_HAL_IMPL_AVX_UNPACKS(v_int8x32, epi8) +OPENCV_HAL_IMPL_AVX_UNPACKS(v_uint16x16, epi16) +OPENCV_HAL_IMPL_AVX_UNPACKS(v_int16x16, epi16) +OPENCV_HAL_IMPL_AVX_UNPACKS(v_uint32x8, epi32) +OPENCV_HAL_IMPL_AVX_UNPACKS(v_int32x8, epi32) +OPENCV_HAL_IMPL_AVX_UNPACKS(v_uint64x4, epi64) +OPENCV_HAL_IMPL_AVX_UNPACKS(v_int64x4, epi64) +OPENCV_HAL_IMPL_AVX_COMBINE(v_float32x8, _mm256_permute2f128_ps) +OPENCV_HAL_IMPL_AVX_COMBINE(v_float64x4, _mm256_permute2f128_pd) + +inline void v_zip(const v_float32x8& a0, const v_float32x8& a1, v_float32x8& b0, v_float32x8& b1) +{ + __m256 v0 = _mm256_unpacklo_ps(a0.val, a1.val); + __m256 v1 = _mm256_unpackhi_ps(a0.val, a1.val); + v_recombine(v_float32x8(v0), v_float32x8(v1), b0, b1); +} + +inline void v_zip(const v_float64x4& a0, const v_float64x4& a1, v_float64x4& b0, v_float64x4& b1) +{ + __m256d v0 = _v_shuffle_odd_64(a0.val); + __m256d v1 = _v_shuffle_odd_64(a1.val); + b0.val = _mm256_unpacklo_pd(v0, v1); + b1.val = _mm256_unpackhi_pd(v0, v1); +}*/ + +//////////////// Variant Value reordering /////////////// + +// unpacks +#define OPENCV_HAL_IMPL_AVX_UNPACK(_Tpvec, suffix) \ + inline _Tpvec v256_unpacklo(const _Tpvec& a, const _Tpvec& b) \ + { return _Tpvec(_mm256_unpacklo_##suffix(a.val, b.val)); } \ + inline _Tpvec v256_unpackhi(const _Tpvec& a, const _Tpvec& b) \ + { return _Tpvec(_mm256_unpackhi_##suffix(a.val, b.val)); } + +OPENCV_HAL_IMPL_AVX_UNPACK(v_uint8x32, epi8) +OPENCV_HAL_IMPL_AVX_UNPACK(v_int8x32, epi8) +OPENCV_HAL_IMPL_AVX_UNPACK(v_uint16x16, epi16) +OPENCV_HAL_IMPL_AVX_UNPACK(v_int16x16, epi16) +OPENCV_HAL_IMPL_AVX_UNPACK(v_uint32x8, epi32) +OPENCV_HAL_IMPL_AVX_UNPACK(v_int32x8, epi32) +OPENCV_HAL_IMPL_AVX_UNPACK(v_uint64x4, epi64) +OPENCV_HAL_IMPL_AVX_UNPACK(v_int64x4, epi64) +OPENCV_HAL_IMPL_AVX_UNPACK(v_float32x8, ps) +OPENCV_HAL_IMPL_AVX_UNPACK(v_float64x4, pd) + +// blend +#define OPENCV_HAL_IMPL_AVX_BLEND(_Tpvec, suffix) \ + template \ + inline _Tpvec v256_blend(const _Tpvec& a, const _Tpvec& b) \ + { return _Tpvec(_mm256_blend_##suffix(a.val, b.val, m)); } + +OPENCV_HAL_IMPL_AVX_BLEND(v_uint16x16, epi16) +OPENCV_HAL_IMPL_AVX_BLEND(v_int16x16, epi16) +OPENCV_HAL_IMPL_AVX_BLEND(v_uint32x8, epi32) +OPENCV_HAL_IMPL_AVX_BLEND(v_int32x8, epi32) +OPENCV_HAL_IMPL_AVX_BLEND(v_float32x8, ps) +OPENCV_HAL_IMPL_AVX_BLEND(v_float64x4, pd) + +template +inline v_uint64x4 v256_blend(const v_uint64x4& a, const v_uint64x4& b) +{ + enum {M0 = m}; + enum {M1 = (M0 | (M0 << 2)) & 0x33}; + enum {M2 = (M1 | (M1 << 1)) & 0x55}; + enum {MM = M2 | (M2 << 1)}; + return v_uint64x4(_mm256_blend_epi32(a.val, b.val, MM)); +} +template +inline v_int64x4 v256_blend(const v_int64x4& a, const v_int64x4& b) +{ return v_int64x4(v256_blend(v_uint64x4(a.val), v_uint64x4(b.val)).val); } + +// shuffle +// todo: emulate 64bit +#define OPENCV_HAL_IMPL_AVX_SHUFFLE(_Tpvec, intrin) \ + template \ + inline _Tpvec v256_shuffle(const _Tpvec& a) \ + { return _Tpvec(_mm256_##intrin(a.val, m)); } + +OPENCV_HAL_IMPL_AVX_SHUFFLE(v_uint32x8, shuffle_epi32) +OPENCV_HAL_IMPL_AVX_SHUFFLE(v_int32x8, shuffle_epi32) +OPENCV_HAL_IMPL_AVX_SHUFFLE(v_float32x8, permute_ps) +OPENCV_HAL_IMPL_AVX_SHUFFLE(v_float64x4, permute_pd) + +template +inline void v256_zip(const _Tpvec& a, const _Tpvec& b, _Tpvec& ab0, _Tpvec& ab1) +{ + ab0 = v256_unpacklo(a, b); + ab1 = v256_unpackhi(a, b); +} + +template +inline _Tpvec v256_combine_diagonal(const _Tpvec& a, const _Tpvec& b) +{ return _Tpvec(_mm256_blend_epi32(a.val, b.val, 0xf0)); } + +inline v_float32x8 v256_combine_diagonal(const v_float32x8& a, const v_float32x8& b) +{ return v256_blend<0xf0>(a, b); } + +inline v_float64x4 v256_combine_diagonal(const v_float64x4& a, const v_float64x4& b) +{ return v256_blend<0xc>(a, b); } + +template +inline _Tpvec v256_alignr_128(const _Tpvec& a, const _Tpvec& b) +{ return v256_permute2x128<0x21>(a, b); } + +template +inline _Tpvec v256_alignr_64(const _Tpvec& a, const _Tpvec& b) +{ return _Tpvec(_mm256_alignr_epi8(a.val, b.val, 8)); } +inline v_float64x4 v256_alignr_64(const v_float64x4& a, const v_float64x4& b) +{ return v_float64x4(_mm256_shuffle_pd(b.val, a.val, _MM_SHUFFLE(0, 0, 1, 1))); } +// todo: emulate float32 + +template +inline _Tpvec v256_swap_halves(const _Tpvec& a) +{ return v256_permute2x128<1>(a, a); } + +template +inline _Tpvec v256_reverse_64(const _Tpvec& a) +{ return v256_permute4x64<_MM_SHUFFLE(0, 1, 2, 3)>(a); } + +// ZIP +#define OPENCV_HAL_IMPL_AVX_ZIP(_Tpvec) \ + inline _Tpvec v_combine_low(const _Tpvec& a, const _Tpvec& b) \ + { return v256_permute2x128<0x20>(a, b); } \ + inline _Tpvec v_combine_high(const _Tpvec& a, const _Tpvec& b) \ + { return v256_permute2x128<0x31>(a, b); } \ + inline void v_recombine(const _Tpvec& a, const _Tpvec& b, \ + _Tpvec& c, _Tpvec& d) \ + { \ + _Tpvec a1b0 = v256_alignr_128(a, b); \ + c = v256_combine_diagonal(a, a1b0); \ + d = v256_combine_diagonal(a1b0, b); \ + } \ + inline void v_zip(const _Tpvec& a, const _Tpvec& b, \ + _Tpvec& ab0, _Tpvec& ab1) \ + { \ + _Tpvec ab0ab2, ab1ab3; \ + v256_zip(a, b, ab0ab2, ab1ab3); \ + v_recombine(ab0ab2, ab1ab3, ab0, ab1); \ + } + +OPENCV_HAL_IMPL_AVX_ZIP(v_uint8x32) +OPENCV_HAL_IMPL_AVX_ZIP(v_int8x32) +OPENCV_HAL_IMPL_AVX_ZIP(v_uint16x16) +OPENCV_HAL_IMPL_AVX_ZIP(v_int16x16) +OPENCV_HAL_IMPL_AVX_ZIP(v_uint32x8) +OPENCV_HAL_IMPL_AVX_ZIP(v_int32x8) +OPENCV_HAL_IMPL_AVX_ZIP(v_uint64x4) +OPENCV_HAL_IMPL_AVX_ZIP(v_int64x4) +OPENCV_HAL_IMPL_AVX_ZIP(v_float32x8) +OPENCV_HAL_IMPL_AVX_ZIP(v_float64x4) + +////////// Arithmetic, bitwise and comparison operations ///////// + +/* Element-wise binary and unary operations */ + +/** Arithmetics **/ +#define OPENCV_HAL_IMPL_AVX_BIN_OP(bin_op, _Tpvec, intrin) \ + inline _Tpvec operator bin_op (const _Tpvec& a, const _Tpvec& b) \ + { return _Tpvec(intrin(a.val, b.val)); } \ + inline _Tpvec& operator bin_op##= (_Tpvec& a, const _Tpvec& b) \ + { a.val = intrin(a.val, b.val); return a; } + +OPENCV_HAL_IMPL_AVX_BIN_OP(+, v_uint8x32, _mm256_adds_epu8) +OPENCV_HAL_IMPL_AVX_BIN_OP(-, v_uint8x32, _mm256_subs_epu8) +OPENCV_HAL_IMPL_AVX_BIN_OP(+, v_int8x32, _mm256_adds_epi8) +OPENCV_HAL_IMPL_AVX_BIN_OP(-, v_int8x32, _mm256_subs_epi8) +OPENCV_HAL_IMPL_AVX_BIN_OP(+, v_uint16x16, _mm256_adds_epu16) +OPENCV_HAL_IMPL_AVX_BIN_OP(-, v_uint16x16, _mm256_subs_epu16) +OPENCV_HAL_IMPL_AVX_BIN_OP(+, v_int16x16, _mm256_adds_epi16) +OPENCV_HAL_IMPL_AVX_BIN_OP(-, v_int16x16, _mm256_subs_epi16) +OPENCV_HAL_IMPL_AVX_BIN_OP(+, v_uint32x8, _mm256_add_epi32) +OPENCV_HAL_IMPL_AVX_BIN_OP(-, v_uint32x8, _mm256_sub_epi32) +OPENCV_HAL_IMPL_AVX_BIN_OP(*, v_uint32x8, _mm256_mullo_epi32) +OPENCV_HAL_IMPL_AVX_BIN_OP(+, v_int32x8, _mm256_add_epi32) +OPENCV_HAL_IMPL_AVX_BIN_OP(-, v_int32x8, _mm256_sub_epi32) +OPENCV_HAL_IMPL_AVX_BIN_OP(*, v_int32x8, _mm256_mullo_epi32) +OPENCV_HAL_IMPL_AVX_BIN_OP(+, v_uint64x4, _mm256_add_epi64) +OPENCV_HAL_IMPL_AVX_BIN_OP(-, v_uint64x4, _mm256_sub_epi64) +OPENCV_HAL_IMPL_AVX_BIN_OP(+, v_int64x4, _mm256_add_epi64) +OPENCV_HAL_IMPL_AVX_BIN_OP(-, v_int64x4, _mm256_sub_epi64) + +OPENCV_HAL_IMPL_AVX_BIN_OP(+, v_float32x8, _mm256_add_ps) +OPENCV_HAL_IMPL_AVX_BIN_OP(-, v_float32x8, _mm256_sub_ps) +OPENCV_HAL_IMPL_AVX_BIN_OP(*, v_float32x8, _mm256_mul_ps) +OPENCV_HAL_IMPL_AVX_BIN_OP(/, v_float32x8, _mm256_div_ps) +OPENCV_HAL_IMPL_AVX_BIN_OP(+, v_float64x4, _mm256_add_pd) +OPENCV_HAL_IMPL_AVX_BIN_OP(-, v_float64x4, _mm256_sub_pd) +OPENCV_HAL_IMPL_AVX_BIN_OP(*, v_float64x4, _mm256_mul_pd) +OPENCV_HAL_IMPL_AVX_BIN_OP(/, v_float64x4, _mm256_div_pd) + +// saturating multiply 8-bit, 16-bit +inline v_uint8x32 operator * (const v_uint8x32& a, const v_uint8x32& b) +{ + v_uint16x16 c, d; + v_mul_expand(a, b, c, d); + return v_pack(c, d); +} +inline v_int8x32 operator * (const v_int8x32& a, const v_int8x32& b) +{ + v_int16x16 c, d; + v_mul_expand(a, b, c, d); + return v_pack(c, d); +} +inline v_uint16x16 operator * (const v_uint16x16& a, const v_uint16x16& b) +{ + __m256i pl = _mm256_mullo_epi16(a.val, b.val); + __m256i ph = _mm256_mulhi_epu16(a.val, b.val); + __m256i p0 = _mm256_unpacklo_epi16(pl, ph); + __m256i p1 = _mm256_unpackhi_epi16(pl, ph); + return v_uint16x16(_v256_packs_epu32(p0, p1)); +} +inline v_int16x16 operator * (const v_int16x16& a, const v_int16x16& b) +{ + __m256i pl = _mm256_mullo_epi16(a.val, b.val); + __m256i ph = _mm256_mulhi_epi16(a.val, b.val); + __m256i p0 = _mm256_unpacklo_epi16(pl, ph); + __m256i p1 = _mm256_unpackhi_epi16(pl, ph); + return v_int16x16(_mm256_packs_epi32(p0, p1)); +} +inline v_uint8x32& operator *= (v_uint8x32& a, const v_uint8x32& b) +{ a = a * b; return a; } +inline v_int8x32& operator *= (v_int8x32& a, const v_int8x32& b) +{ a = a * b; return a; } +inline v_uint16x16& operator *= (v_uint16x16& a, const v_uint16x16& b) +{ a = a * b; return a; } +inline v_int16x16& operator *= (v_int16x16& a, const v_int16x16& b) +{ a = a * b; return a; } + +/** Non-saturating arithmetics **/ +#define OPENCV_HAL_IMPL_AVX_BIN_FUNC(func, _Tpvec, intrin) \ + inline _Tpvec func(const _Tpvec& a, const _Tpvec& b) \ + { return _Tpvec(intrin(a.val, b.val)); } + +OPENCV_HAL_IMPL_AVX_BIN_FUNC(v_add_wrap, v_uint8x32, _mm256_add_epi8) +OPENCV_HAL_IMPL_AVX_BIN_FUNC(v_add_wrap, v_int8x32, _mm256_add_epi8) +OPENCV_HAL_IMPL_AVX_BIN_FUNC(v_add_wrap, v_uint16x16, _mm256_add_epi16) +OPENCV_HAL_IMPL_AVX_BIN_FUNC(v_add_wrap, v_int16x16, _mm256_add_epi16) +OPENCV_HAL_IMPL_AVX_BIN_FUNC(v_sub_wrap, v_uint8x32, _mm256_sub_epi8) +OPENCV_HAL_IMPL_AVX_BIN_FUNC(v_sub_wrap, v_int8x32, _mm256_sub_epi8) +OPENCV_HAL_IMPL_AVX_BIN_FUNC(v_sub_wrap, v_uint16x16, _mm256_sub_epi16) +OPENCV_HAL_IMPL_AVX_BIN_FUNC(v_sub_wrap, v_int16x16, _mm256_sub_epi16) +OPENCV_HAL_IMPL_AVX_BIN_FUNC(v_mul_wrap, v_uint16x16, _mm256_mullo_epi16) +OPENCV_HAL_IMPL_AVX_BIN_FUNC(v_mul_wrap, v_int16x16, _mm256_mullo_epi16) + +inline v_uint8x32 v_mul_wrap(const v_uint8x32& a, const v_uint8x32& b) +{ + __m256i ad = _mm256_srai_epi16(a.val, 8); + __m256i bd = _mm256_srai_epi16(b.val, 8); + __m256i p0 = _mm256_mullo_epi16(a.val, b.val); // even + __m256i p1 = _mm256_slli_epi16(_mm256_mullo_epi16(ad, bd), 8); // odd + + const __m256i b01 = _mm256_set1_epi32(0xFF00FF00); + return v_uint8x32(_mm256_blendv_epi8(p0, p1, b01)); +} +inline v_int8x32 v_mul_wrap(const v_int8x32& a, const v_int8x32& b) +{ + return v_reinterpret_as_s8(v_mul_wrap(v_reinterpret_as_u8(a), v_reinterpret_as_u8(b))); +} + +// Multiply and expand +inline void v_mul_expand(const v_uint8x32& a, const v_uint8x32& b, + v_uint16x16& c, v_uint16x16& d) +{ + v_uint16x16 a0, a1, b0, b1; + v_expand(a, a0, a1); + v_expand(b, b0, b1); + c = v_mul_wrap(a0, b0); + d = v_mul_wrap(a1, b1); +} + +inline void v_mul_expand(const v_int8x32& a, const v_int8x32& b, + v_int16x16& c, v_int16x16& d) +{ + v_int16x16 a0, a1, b0, b1; + v_expand(a, a0, a1); + v_expand(b, b0, b1); + c = v_mul_wrap(a0, b0); + d = v_mul_wrap(a1, b1); +} + +inline void v_mul_expand(const v_int16x16& a, const v_int16x16& b, + v_int32x8& c, v_int32x8& d) +{ + v_int16x16 vhi = v_int16x16(_mm256_mulhi_epi16(a.val, b.val)); + + v_int16x16 v0, v1; + v_zip(v_mul_wrap(a, b), vhi, v0, v1); + + c = v_reinterpret_as_s32(v0); + d = v_reinterpret_as_s32(v1); +} + +inline void v_mul_expand(const v_uint16x16& a, const v_uint16x16& b, + v_uint32x8& c, v_uint32x8& d) +{ + v_uint16x16 vhi = v_uint16x16(_mm256_mulhi_epu16(a.val, b.val)); + + v_uint16x16 v0, v1; + v_zip(v_mul_wrap(a, b), vhi, v0, v1); + + c = v_reinterpret_as_u32(v0); + d = v_reinterpret_as_u32(v1); +} + +inline void v_mul_expand(const v_uint32x8& a, const v_uint32x8& b, + v_uint64x4& c, v_uint64x4& d) +{ + __m256i v0 = _mm256_mul_epu32(a.val, b.val); + __m256i v1 = _mm256_mul_epu32(_mm256_srli_epi64(a.val, 32), _mm256_srli_epi64(b.val, 32)); + v_zip(v_uint64x4(v0), v_uint64x4(v1), c, d); +} + +inline v_int16x16 v_mul_hi(const v_int16x16& a, const v_int16x16& b) { return v_int16x16(_mm256_mulhi_epi16(a.val, b.val)); } +inline v_uint16x16 v_mul_hi(const v_uint16x16& a, const v_uint16x16& b) { return v_uint16x16(_mm256_mulhi_epu16(a.val, b.val)); } + +/** Bitwise shifts **/ +#define OPENCV_HAL_IMPL_AVX_SHIFT_OP(_Tpuvec, _Tpsvec, suffix, srai) \ + inline _Tpuvec operator << (const _Tpuvec& a, int imm) \ + { return _Tpuvec(_mm256_slli_##suffix(a.val, imm)); } \ + inline _Tpsvec operator << (const _Tpsvec& a, int imm) \ + { return _Tpsvec(_mm256_slli_##suffix(a.val, imm)); } \ + inline _Tpuvec operator >> (const _Tpuvec& a, int imm) \ + { return _Tpuvec(_mm256_srli_##suffix(a.val, imm)); } \ + inline _Tpsvec operator >> (const _Tpsvec& a, int imm) \ + { return _Tpsvec(srai(a.val, imm)); } \ + template \ + inline _Tpuvec v_shl(const _Tpuvec& a) \ + { return _Tpuvec(_mm256_slli_##suffix(a.val, imm)); } \ + template \ + inline _Tpsvec v_shl(const _Tpsvec& a) \ + { return _Tpsvec(_mm256_slli_##suffix(a.val, imm)); } \ + template \ + inline _Tpuvec v_shr(const _Tpuvec& a) \ + { return _Tpuvec(_mm256_srli_##suffix(a.val, imm)); } \ + template \ + inline _Tpsvec v_shr(const _Tpsvec& a) \ + { return _Tpsvec(srai(a.val, imm)); } + +OPENCV_HAL_IMPL_AVX_SHIFT_OP(v_uint16x16, v_int16x16, epi16, _mm256_srai_epi16) +OPENCV_HAL_IMPL_AVX_SHIFT_OP(v_uint32x8, v_int32x8, epi32, _mm256_srai_epi32) + +inline __m256i _mm256_srai_epi64xx(const __m256i a, int imm) +{ + __m256i d = _mm256_set1_epi64x((int64)1 << 63); + __m256i r = _mm256_srli_epi64(_mm256_add_epi64(a, d), imm); + return _mm256_sub_epi64(r, _mm256_srli_epi64(d, imm)); +} +OPENCV_HAL_IMPL_AVX_SHIFT_OP(v_uint64x4, v_int64x4, epi64, _mm256_srai_epi64xx) + + +/** Bitwise logic **/ +#define OPENCV_HAL_IMPL_AVX_LOGIC_OP(_Tpvec, suffix, not_const) \ + OPENCV_HAL_IMPL_AVX_BIN_OP(&, _Tpvec, _mm256_and_##suffix) \ + OPENCV_HAL_IMPL_AVX_BIN_OP(|, _Tpvec, _mm256_or_##suffix) \ + OPENCV_HAL_IMPL_AVX_BIN_OP(^, _Tpvec, _mm256_xor_##suffix) \ + inline _Tpvec operator ~ (const _Tpvec& a) \ + { return _Tpvec(_mm256_xor_##suffix(a.val, not_const)); } + +OPENCV_HAL_IMPL_AVX_LOGIC_OP(v_uint8x32, si256, _mm256_set1_epi32(-1)) +OPENCV_HAL_IMPL_AVX_LOGIC_OP(v_int8x32, si256, _mm256_set1_epi32(-1)) +OPENCV_HAL_IMPL_AVX_LOGIC_OP(v_uint16x16, si256, _mm256_set1_epi32(-1)) +OPENCV_HAL_IMPL_AVX_LOGIC_OP(v_int16x16, si256, _mm256_set1_epi32(-1)) +OPENCV_HAL_IMPL_AVX_LOGIC_OP(v_uint32x8, si256, _mm256_set1_epi32(-1)) +OPENCV_HAL_IMPL_AVX_LOGIC_OP(v_int32x8, si256, _mm256_set1_epi32(-1)) +OPENCV_HAL_IMPL_AVX_LOGIC_OP(v_uint64x4, si256, _mm256_set1_epi64x(-1)) +OPENCV_HAL_IMPL_AVX_LOGIC_OP(v_int64x4, si256, _mm256_set1_epi64x(-1)) +OPENCV_HAL_IMPL_AVX_LOGIC_OP(v_float32x8, ps, _mm256_castsi256_ps(_mm256_set1_epi32(-1))) +OPENCV_HAL_IMPL_AVX_LOGIC_OP(v_float64x4, pd, _mm256_castsi256_pd(_mm256_set1_epi32(-1))) + +/** Select **/ +#define OPENCV_HAL_IMPL_AVX_SELECT(_Tpvec, suffix) \ + inline _Tpvec v_select(const _Tpvec& mask, const _Tpvec& a, const _Tpvec& b) \ + { return _Tpvec(_mm256_blendv_##suffix(b.val, a.val, mask.val)); } + +OPENCV_HAL_IMPL_AVX_SELECT(v_uint8x32, epi8) +OPENCV_HAL_IMPL_AVX_SELECT(v_int8x32, epi8) +OPENCV_HAL_IMPL_AVX_SELECT(v_uint16x16, epi8) +OPENCV_HAL_IMPL_AVX_SELECT(v_int16x16, epi8) +OPENCV_HAL_IMPL_AVX_SELECT(v_uint32x8, epi8) +OPENCV_HAL_IMPL_AVX_SELECT(v_int32x8, epi8) +OPENCV_HAL_IMPL_AVX_SELECT(v_float32x8, ps) +OPENCV_HAL_IMPL_AVX_SELECT(v_float64x4, pd) + +/** Comparison **/ +#define OPENCV_HAL_IMPL_AVX_CMP_OP_OV(_Tpvec) \ + inline _Tpvec operator != (const _Tpvec& a, const _Tpvec& b) \ + { return ~(a == b); } \ + inline _Tpvec operator < (const _Tpvec& a, const _Tpvec& b) \ + { return b > a; } \ + inline _Tpvec operator >= (const _Tpvec& a, const _Tpvec& b) \ + { return ~(a < b); } \ + inline _Tpvec operator <= (const _Tpvec& a, const _Tpvec& b) \ + { return b >= a; } + +#define OPENCV_HAL_IMPL_AVX_CMP_OP_INT(_Tpuvec, _Tpsvec, suffix, sbit) \ + inline _Tpuvec operator == (const _Tpuvec& a, const _Tpuvec& b) \ + { return _Tpuvec(_mm256_cmpeq_##suffix(a.val, b.val)); } \ + inline _Tpuvec operator > (const _Tpuvec& a, const _Tpuvec& b) \ + { \ + __m256i smask = _mm256_set1_##suffix(sbit); \ + return _Tpuvec(_mm256_cmpgt_##suffix( \ + _mm256_xor_si256(a.val, smask), \ + _mm256_xor_si256(b.val, smask))); \ + } \ + inline _Tpsvec operator == (const _Tpsvec& a, const _Tpsvec& b) \ + { return _Tpsvec(_mm256_cmpeq_##suffix(a.val, b.val)); } \ + inline _Tpsvec operator > (const _Tpsvec& a, const _Tpsvec& b) \ + { return _Tpsvec(_mm256_cmpgt_##suffix(a.val, b.val)); } \ + OPENCV_HAL_IMPL_AVX_CMP_OP_OV(_Tpuvec) \ + OPENCV_HAL_IMPL_AVX_CMP_OP_OV(_Tpsvec) + +OPENCV_HAL_IMPL_AVX_CMP_OP_INT(v_uint8x32, v_int8x32, epi8, (char)-128) +OPENCV_HAL_IMPL_AVX_CMP_OP_INT(v_uint16x16, v_int16x16, epi16, (short)-32768) +OPENCV_HAL_IMPL_AVX_CMP_OP_INT(v_uint32x8, v_int32x8, epi32, (int)0x80000000) + +#define OPENCV_HAL_IMPL_AVX_CMP_OP_64BIT(_Tpvec) \ + inline _Tpvec operator == (const _Tpvec& a, const _Tpvec& b) \ + { return _Tpvec(_mm256_cmpeq_epi64(a.val, b.val)); } \ + inline _Tpvec operator != (const _Tpvec& a, const _Tpvec& b) \ + { return ~(a == b); } + +OPENCV_HAL_IMPL_AVX_CMP_OP_64BIT(v_uint64x4) +OPENCV_HAL_IMPL_AVX_CMP_OP_64BIT(v_int64x4) + +#define OPENCV_HAL_IMPL_AVX_CMP_FLT(bin_op, imm8, _Tpvec, suffix) \ + inline _Tpvec operator bin_op (const _Tpvec& a, const _Tpvec& b) \ + { return _Tpvec(_mm256_cmp_##suffix(a.val, b.val, imm8)); } + +#define OPENCV_HAL_IMPL_AVX_CMP_OP_FLT(_Tpvec, suffix) \ + OPENCV_HAL_IMPL_AVX_CMP_FLT(==, _CMP_EQ_OQ, _Tpvec, suffix) \ + OPENCV_HAL_IMPL_AVX_CMP_FLT(!=, _CMP_NEQ_OQ, _Tpvec, suffix) \ + OPENCV_HAL_IMPL_AVX_CMP_FLT(<, _CMP_LT_OQ, _Tpvec, suffix) \ + OPENCV_HAL_IMPL_AVX_CMP_FLT(>, _CMP_GT_OQ, _Tpvec, suffix) \ + OPENCV_HAL_IMPL_AVX_CMP_FLT(<=, _CMP_LE_OQ, _Tpvec, suffix) \ + OPENCV_HAL_IMPL_AVX_CMP_FLT(>=, _CMP_GE_OQ, _Tpvec, suffix) + +OPENCV_HAL_IMPL_AVX_CMP_OP_FLT(v_float32x8, ps) +OPENCV_HAL_IMPL_AVX_CMP_OP_FLT(v_float64x4, pd) + +inline v_float32x8 v_not_nan(const v_float32x8& a) +{ return v_float32x8(_mm256_cmp_ps(a.val, a.val, _CMP_ORD_Q)); } +inline v_float64x4 v_not_nan(const v_float64x4& a) +{ return v_float64x4(_mm256_cmp_pd(a.val, a.val, _CMP_ORD_Q)); } + +/** min/max **/ +OPENCV_HAL_IMPL_AVX_BIN_FUNC(v_min, v_uint8x32, _mm256_min_epu8) +OPENCV_HAL_IMPL_AVX_BIN_FUNC(v_max, v_uint8x32, _mm256_max_epu8) +OPENCV_HAL_IMPL_AVX_BIN_FUNC(v_min, v_int8x32, _mm256_min_epi8) +OPENCV_HAL_IMPL_AVX_BIN_FUNC(v_max, v_int8x32, _mm256_max_epi8) +OPENCV_HAL_IMPL_AVX_BIN_FUNC(v_min, v_uint16x16, _mm256_min_epu16) +OPENCV_HAL_IMPL_AVX_BIN_FUNC(v_max, v_uint16x16, _mm256_max_epu16) +OPENCV_HAL_IMPL_AVX_BIN_FUNC(v_min, v_int16x16, _mm256_min_epi16) +OPENCV_HAL_IMPL_AVX_BIN_FUNC(v_max, v_int16x16, _mm256_max_epi16) +OPENCV_HAL_IMPL_AVX_BIN_FUNC(v_min, v_uint32x8, _mm256_min_epu32) +OPENCV_HAL_IMPL_AVX_BIN_FUNC(v_max, v_uint32x8, _mm256_max_epu32) +OPENCV_HAL_IMPL_AVX_BIN_FUNC(v_min, v_int32x8, _mm256_min_epi32) +OPENCV_HAL_IMPL_AVX_BIN_FUNC(v_max, v_int32x8, _mm256_max_epi32) +OPENCV_HAL_IMPL_AVX_BIN_FUNC(v_min, v_float32x8, _mm256_min_ps) +OPENCV_HAL_IMPL_AVX_BIN_FUNC(v_max, v_float32x8, _mm256_max_ps) +OPENCV_HAL_IMPL_AVX_BIN_FUNC(v_min, v_float64x4, _mm256_min_pd) +OPENCV_HAL_IMPL_AVX_BIN_FUNC(v_max, v_float64x4, _mm256_max_pd) + +/** Rotate **/ +template +inline v_uint8x32 v_rotate_left(const v_uint8x32& a, const v_uint8x32& b) +{ + enum {IMM_R = (16 - imm) & 0xFF}; + enum {IMM_R2 = (32 - imm) & 0xFF}; + + if (imm == 0) return a; + if (imm == 32) return b; + if (imm > 32) return v_uint8x32(); + + __m256i swap = _mm256_permute2x128_si256(a.val, b.val, 0x03); + if (imm == 16) return v_uint8x32(swap); + if (imm < 16) return v_uint8x32(_mm256_alignr_epi8(a.val, swap, IMM_R)); + return v_uint8x32(_mm256_alignr_epi8(swap, b.val, IMM_R2)); // imm < 32 +} + +template +inline v_uint8x32 v_rotate_right(const v_uint8x32& a, const v_uint8x32& b) +{ + enum {IMM_L = (imm - 16) & 0xFF}; + + if (imm == 0) return a; + if (imm == 32) return b; + if (imm > 32) return v_uint8x32(); + + __m256i swap = _mm256_permute2x128_si256(a.val, b.val, 0x21); + if (imm == 16) return v_uint8x32(swap); + if (imm < 16) return v_uint8x32(_mm256_alignr_epi8(swap, a.val, imm)); + return v_uint8x32(_mm256_alignr_epi8(b.val, swap, IMM_L)); +} + +template +inline v_uint8x32 v_rotate_left(const v_uint8x32& a) +{ + enum {IMM_L = (imm - 16) & 0xFF}; + enum {IMM_R = (16 - imm) & 0xFF}; + + if (imm == 0) return a; + if (imm > 32) return v_uint8x32(); + + // ESAC control[3] ? [127:0] = 0 + __m256i swapz = _mm256_permute2x128_si256(a.val, a.val, _MM_SHUFFLE(0, 0, 2, 0)); + if (imm == 16) return v_uint8x32(swapz); + if (imm < 16) return v_uint8x32(_mm256_alignr_epi8(a.val, swapz, IMM_R)); + return v_uint8x32(_mm256_slli_si256(swapz, IMM_L)); +} + +template +inline v_uint8x32 v_rotate_right(const v_uint8x32& a) +{ + enum {IMM_L = (imm - 16) & 0xFF}; + + if (imm == 0) return a; + if (imm > 32) return v_uint8x32(); + + // ESAC control[3] ? [127:0] = 0 + __m256i swapz = _mm256_permute2x128_si256(a.val, a.val, _MM_SHUFFLE(2, 0, 0, 1)); + if (imm == 16) return v_uint8x32(swapz); + if (imm < 16) return v_uint8x32(_mm256_alignr_epi8(swapz, a.val, imm)); + return v_uint8x32(_mm256_srli_si256(swapz, IMM_L)); +} + +#define OPENCV_HAL_IMPL_AVX_ROTATE_CAST(intrin, _Tpvec, cast) \ + template \ + inline _Tpvec intrin(const _Tpvec& a, const _Tpvec& b) \ + { \ + enum {IMMxW = imm * sizeof(typename _Tpvec::lane_type)}; \ + v_uint8x32 ret = intrin(v_reinterpret_as_u8(a), \ + v_reinterpret_as_u8(b)); \ + return _Tpvec(cast(ret.val)); \ + } \ + template \ + inline _Tpvec intrin(const _Tpvec& a) \ + { \ + enum {IMMxW = imm * sizeof(typename _Tpvec::lane_type)}; \ + v_uint8x32 ret = intrin(v_reinterpret_as_u8(a)); \ + return _Tpvec(cast(ret.val)); \ + } + +#define OPENCV_HAL_IMPL_AVX_ROTATE(_Tpvec) \ + OPENCV_HAL_IMPL_AVX_ROTATE_CAST(v_rotate_left, _Tpvec, OPENCV_HAL_NOP) \ + OPENCV_HAL_IMPL_AVX_ROTATE_CAST(v_rotate_right, _Tpvec, OPENCV_HAL_NOP) + +OPENCV_HAL_IMPL_AVX_ROTATE(v_int8x32) +OPENCV_HAL_IMPL_AVX_ROTATE(v_uint16x16) +OPENCV_HAL_IMPL_AVX_ROTATE(v_int16x16) +OPENCV_HAL_IMPL_AVX_ROTATE(v_uint32x8) +OPENCV_HAL_IMPL_AVX_ROTATE(v_int32x8) +OPENCV_HAL_IMPL_AVX_ROTATE(v_uint64x4) +OPENCV_HAL_IMPL_AVX_ROTATE(v_int64x4) + +OPENCV_HAL_IMPL_AVX_ROTATE_CAST(v_rotate_left, v_float32x8, _mm256_castsi256_ps) +OPENCV_HAL_IMPL_AVX_ROTATE_CAST(v_rotate_right, v_float32x8, _mm256_castsi256_ps) +OPENCV_HAL_IMPL_AVX_ROTATE_CAST(v_rotate_left, v_float64x4, _mm256_castsi256_pd) +OPENCV_HAL_IMPL_AVX_ROTATE_CAST(v_rotate_right, v_float64x4, _mm256_castsi256_pd) + +/** Reverse **/ +inline v_uint8x32 v_reverse(const v_uint8x32 &a) +{ + static const __m256i perm = _mm256_setr_epi8( + 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, + 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __m256i vec = _mm256_shuffle_epi8(a.val, perm); + return v_uint8x32(_mm256_permute2x128_si256(vec, vec, 1)); +} + +inline v_int8x32 v_reverse(const v_int8x32 &a) +{ return v_reinterpret_as_s8(v_reverse(v_reinterpret_as_u8(a))); } + +inline v_uint16x16 v_reverse(const v_uint16x16 &a) +{ + static const __m256i perm = _mm256_setr_epi8( + 14, 15, 12, 13, 10, 11, 8, 9, 6, 7, 4, 5, 2, 3, 0, 1, + 14, 15, 12, 13, 10, 11, 8, 9, 6, 7, 4, 5, 2, 3, 0, 1); + __m256i vec = _mm256_shuffle_epi8(a.val, perm); + return v_uint16x16(_mm256_permute2x128_si256(vec, vec, 1)); +} + +inline v_int16x16 v_reverse(const v_int16x16 &a) +{ return v_reinterpret_as_s16(v_reverse(v_reinterpret_as_u16(a))); } + +inline v_uint32x8 v_reverse(const v_uint32x8 &a) +{ + static const __m256i perm = _mm256_setr_epi32(7, 6, 5, 4, 3, 2, 1, 0); + return v_uint32x8(_mm256_permutevar8x32_epi32(a.val, perm)); +} + +inline v_int32x8 v_reverse(const v_int32x8 &a) +{ return v_reinterpret_as_s32(v_reverse(v_reinterpret_as_u32(a))); } + +inline v_float32x8 v_reverse(const v_float32x8 &a) +{ return v_reinterpret_as_f32(v_reverse(v_reinterpret_as_u32(a))); } + +inline v_uint64x4 v_reverse(const v_uint64x4 &a) +{ + return v_uint64x4(_mm256_permute4x64_epi64(a.val, _MM_SHUFFLE(0, 1, 2, 3))); +} + +inline v_int64x4 v_reverse(const v_int64x4 &a) +{ return v_reinterpret_as_s64(v_reverse(v_reinterpret_as_u64(a))); } + +inline v_float64x4 v_reverse(const v_float64x4 &a) +{ return v_reinterpret_as_f64(v_reverse(v_reinterpret_as_u64(a))); } + +////////// Reduce and mask ///////// + +/** Reduce **/ +inline unsigned v_reduce_sum(const v_uint8x32& a) +{ + __m256i half = _mm256_sad_epu8(a.val, _mm256_setzero_si256()); + __m128i quarter = _mm_add_epi32(_v256_extract_low(half), _v256_extract_high(half)); + return (unsigned)_mm_cvtsi128_si32(_mm_add_epi32(quarter, _mm_unpackhi_epi64(quarter, quarter))); +} +inline int v_reduce_sum(const v_int8x32& a) +{ + __m256i half = _mm256_sad_epu8(_mm256_xor_si256(a.val, _mm256_set1_epi8((schar)-128)), _mm256_setzero_si256()); + __m128i quarter = _mm_add_epi32(_v256_extract_low(half), _v256_extract_high(half)); + return (unsigned)_mm_cvtsi128_si32(_mm_add_epi32(quarter, _mm_unpackhi_epi64(quarter, quarter))) - 4096; +} +#define OPENCV_HAL_IMPL_AVX_REDUCE_32(_Tpvec, sctype, func, intrin) \ + inline sctype v_reduce_##func(const _Tpvec& a) \ + { \ + __m128i val = intrin(_v256_extract_low(a.val), _v256_extract_high(a.val)); \ + val = intrin(val, _mm_srli_si128(val,8)); \ + val = intrin(val, _mm_srli_si128(val,4)); \ + val = intrin(val, _mm_srli_si128(val,2)); \ + val = intrin(val, _mm_srli_si128(val,1)); \ + return (sctype)_mm_cvtsi128_si32(val); \ + } + +OPENCV_HAL_IMPL_AVX_REDUCE_32(v_uint8x32, uchar, min, _mm_min_epu8) +OPENCV_HAL_IMPL_AVX_REDUCE_32(v_int8x32, schar, min, _mm_min_epi8) +OPENCV_HAL_IMPL_AVX_REDUCE_32(v_uint8x32, uchar, max, _mm_max_epu8) +OPENCV_HAL_IMPL_AVX_REDUCE_32(v_int8x32, schar, max, _mm_max_epi8) + +#define OPENCV_HAL_IMPL_AVX_REDUCE_16(_Tpvec, sctype, func, intrin) \ + inline sctype v_reduce_##func(const _Tpvec& a) \ + { \ + __m128i v0 = _v256_extract_low(a.val); \ + __m128i v1 = _v256_extract_high(a.val); \ + v0 = intrin(v0, v1); \ + v0 = intrin(v0, _mm_srli_si128(v0, 8)); \ + v0 = intrin(v0, _mm_srli_si128(v0, 4)); \ + v0 = intrin(v0, _mm_srli_si128(v0, 2)); \ + return (sctype) _mm_cvtsi128_si32(v0); \ + } + +OPENCV_HAL_IMPL_AVX_REDUCE_16(v_uint16x16, ushort, min, _mm_min_epu16) +OPENCV_HAL_IMPL_AVX_REDUCE_16(v_int16x16, short, min, _mm_min_epi16) +OPENCV_HAL_IMPL_AVX_REDUCE_16(v_uint16x16, ushort, max, _mm_max_epu16) +OPENCV_HAL_IMPL_AVX_REDUCE_16(v_int16x16, short, max, _mm_max_epi16) + +#define OPENCV_HAL_IMPL_AVX_REDUCE_8(_Tpvec, sctype, func, intrin) \ + inline sctype v_reduce_##func(const _Tpvec& a) \ + { \ + __m128i v0 = _v256_extract_low(a.val); \ + __m128i v1 = _v256_extract_high(a.val); \ + v0 = intrin(v0, v1); \ + v0 = intrin(v0, _mm_srli_si128(v0, 8)); \ + v0 = intrin(v0, _mm_srli_si128(v0, 4)); \ + return (sctype) _mm_cvtsi128_si32(v0); \ + } + +OPENCV_HAL_IMPL_AVX_REDUCE_8(v_uint32x8, unsigned, min, _mm_min_epu32) +OPENCV_HAL_IMPL_AVX_REDUCE_8(v_int32x8, int, min, _mm_min_epi32) +OPENCV_HAL_IMPL_AVX_REDUCE_8(v_uint32x8, unsigned, max, _mm_max_epu32) +OPENCV_HAL_IMPL_AVX_REDUCE_8(v_int32x8, int, max, _mm_max_epi32) + +#define OPENCV_HAL_IMPL_AVX_REDUCE_FLT(func, intrin) \ + inline float v_reduce_##func(const v_float32x8& a) \ + { \ + __m128 v0 = _v256_extract_low(a.val); \ + __m128 v1 = _v256_extract_high(a.val); \ + v0 = intrin(v0, v1); \ + v0 = intrin(v0, _mm_permute_ps(v0, _MM_SHUFFLE(0, 0, 3, 2))); \ + v0 = intrin(v0, _mm_permute_ps(v0, _MM_SHUFFLE(0, 0, 0, 1))); \ + return _mm_cvtss_f32(v0); \ + } + +OPENCV_HAL_IMPL_AVX_REDUCE_FLT(min, _mm_min_ps) +OPENCV_HAL_IMPL_AVX_REDUCE_FLT(max, _mm_max_ps) + +inline int v_reduce_sum(const v_int32x8& a) +{ + __m256i s0 = _mm256_hadd_epi32(a.val, a.val); + s0 = _mm256_hadd_epi32(s0, s0); + + __m128i s1 = _v256_extract_high(s0); + s1 = _mm_add_epi32(_v256_extract_low(s0), s1); + + return _mm_cvtsi128_si32(s1); +} + +inline unsigned v_reduce_sum(const v_uint32x8& a) +{ return v_reduce_sum(v_reinterpret_as_s32(a)); } + +inline int v_reduce_sum(const v_int16x16& a) +{ return v_reduce_sum(v_expand_low(a) + v_expand_high(a)); } +inline unsigned v_reduce_sum(const v_uint16x16& a) +{ return v_reduce_sum(v_expand_low(a) + v_expand_high(a)); } + +inline float v_reduce_sum(const v_float32x8& a) +{ + __m256 s0 = _mm256_hadd_ps(a.val, a.val); + s0 = _mm256_hadd_ps(s0, s0); + + __m128 s1 = _v256_extract_high(s0); + s1 = _mm_add_ps(_v256_extract_low(s0), s1); + + return _mm_cvtss_f32(s1); +} + +inline uint64 v_reduce_sum(const v_uint64x4& a) +{ + uint64 CV_DECL_ALIGNED(32) idx[2]; + _mm_store_si128((__m128i*)idx, _mm_add_epi64(_v256_extract_low(a.val), _v256_extract_high(a.val))); + return idx[0] + idx[1]; +} +inline int64 v_reduce_sum(const v_int64x4& a) +{ + int64 CV_DECL_ALIGNED(32) idx[2]; + _mm_store_si128((__m128i*)idx, _mm_add_epi64(_v256_extract_low(a.val), _v256_extract_high(a.val))); + return idx[0] + idx[1]; +} +inline double v_reduce_sum(const v_float64x4& a) +{ + __m256d s0 = _mm256_hadd_pd(a.val, a.val); + return _mm_cvtsd_f64(_mm_add_pd(_v256_extract_low(s0), _v256_extract_high(s0))); +} + +inline v_float32x8 v_reduce_sum4(const v_float32x8& a, const v_float32x8& b, + const v_float32x8& c, const v_float32x8& d) +{ + __m256 ab = _mm256_hadd_ps(a.val, b.val); + __m256 cd = _mm256_hadd_ps(c.val, d.val); + return v_float32x8(_mm256_hadd_ps(ab, cd)); +} + +inline unsigned v_reduce_sad(const v_uint8x32& a, const v_uint8x32& b) +{ + __m256i half = _mm256_sad_epu8(a.val, b.val); + __m128i quarter = _mm_add_epi32(_v256_extract_low(half), _v256_extract_high(half)); + return (unsigned)_mm_cvtsi128_si32(_mm_add_epi32(quarter, _mm_unpackhi_epi64(quarter, quarter))); +} +inline unsigned v_reduce_sad(const v_int8x32& a, const v_int8x32& b) +{ + __m256i half = _mm256_set1_epi8(0x7f); + half = _mm256_sad_epu8(_mm256_add_epi8(a.val, half), _mm256_add_epi8(b.val, half)); + __m128i quarter = _mm_add_epi32(_v256_extract_low(half), _v256_extract_high(half)); + return (unsigned)_mm_cvtsi128_si32(_mm_add_epi32(quarter, _mm_unpackhi_epi64(quarter, quarter))); +} +inline unsigned v_reduce_sad(const v_uint16x16& a, const v_uint16x16& b) +{ + v_uint32x8 l, h; + v_expand(v_add_wrap(a - b, b - a), l, h); + return v_reduce_sum(l + h); +} +inline unsigned v_reduce_sad(const v_int16x16& a, const v_int16x16& b) +{ + v_uint32x8 l, h; + v_expand(v_reinterpret_as_u16(v_sub_wrap(v_max(a, b), v_min(a, b))), l, h); + return v_reduce_sum(l + h); +} +inline unsigned v_reduce_sad(const v_uint32x8& a, const v_uint32x8& b) +{ + return v_reduce_sum(v_max(a, b) - v_min(a, b)); +} +inline unsigned v_reduce_sad(const v_int32x8& a, const v_int32x8& b) +{ + v_int32x8 m = a < b; + return v_reduce_sum(v_reinterpret_as_u32(((a - b) ^ m) - m)); +} +inline float v_reduce_sad(const v_float32x8& a, const v_float32x8& b) +{ + return v_reduce_sum((a - b) & v_float32x8(_mm256_castsi256_ps(_mm256_set1_epi32(0x7fffffff)))); +} + +/** Popcount **/ +inline v_uint8x32 v_popcount(const v_uint8x32& a) +{ + __m256i _popcnt_table = _mm256_setr_epi8(0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, + 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4); + __m256i _popcnt_mask = _mm256_set1_epi8(0x0F); + return v_uint8x32(_mm256_add_epi8(_mm256_shuffle_epi8(_popcnt_table, _mm256_and_si256( a.val , _popcnt_mask)), + _mm256_shuffle_epi8(_popcnt_table, _mm256_and_si256(_mm256_srli_epi16(a.val, 4), _popcnt_mask)))); +} +inline v_uint16x16 v_popcount(const v_uint16x16& a) +{ + v_uint8x32 p = v_popcount(v_reinterpret_as_u8(a)); + p += v_rotate_right<1>(p); + return v_reinterpret_as_u16(p) & v256_setall_u16(0x00ff); +} +inline v_uint32x8 v_popcount(const v_uint32x8& a) +{ + v_uint8x32 p = v_popcount(v_reinterpret_as_u8(a)); + p += v_rotate_right<1>(p); + p += v_rotate_right<2>(p); + return v_reinterpret_as_u32(p) & v256_setall_u32(0x000000ff); +} +inline v_uint64x4 v_popcount(const v_uint64x4& a) +{ + return v_uint64x4(_mm256_sad_epu8(v_popcount(v_reinterpret_as_u8(a)).val, _mm256_setzero_si256())); +} +inline v_uint8x32 v_popcount(const v_int8x32& a) +{ return v_popcount(v_reinterpret_as_u8(a)); } +inline v_uint16x16 v_popcount(const v_int16x16& a) +{ return v_popcount(v_reinterpret_as_u16(a)); } +inline v_uint32x8 v_popcount(const v_int32x8& a) +{ return v_popcount(v_reinterpret_as_u32(a)); } +inline v_uint64x4 v_popcount(const v_int64x4& a) +{ return v_popcount(v_reinterpret_as_u64(a)); } + +/** Mask **/ +inline int v_signmask(const v_int8x32& a) +{ return _mm256_movemask_epi8(a.val); } +inline int v_signmask(const v_uint8x32& a) +{ return v_signmask(v_reinterpret_as_s8(a)); } + +inline int v_signmask(const v_int16x16& a) +{ return v_signmask(v_pack(a, a)) & 0xFFFF; } +inline int v_signmask(const v_uint16x16& a) +{ return v_signmask(v_reinterpret_as_s16(a)); } + +inline int v_signmask(const v_float32x8& a) +{ return _mm256_movemask_ps(a.val); } +inline int v_signmask(const v_float64x4& a) +{ return _mm256_movemask_pd(a.val); } + +inline int v_signmask(const v_int32x8& a) +{ return v_signmask(v_reinterpret_as_f32(a)); } +inline int v_signmask(const v_uint32x8& a) +{ return v_signmask(v_reinterpret_as_f32(a)); } + +inline int v_signmask(const v_int64x4& a) +{ return v_signmask(v_reinterpret_as_f64(a)); } +inline int v_signmask(const v_uint64x4& a) +{ return v_signmask(v_reinterpret_as_f64(a)); } + +inline int v_scan_forward(const v_int8x32& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s8(a))); } +inline int v_scan_forward(const v_uint8x32& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s8(a))); } +inline int v_scan_forward(const v_int16x16& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s8(a))) / 2; } +inline int v_scan_forward(const v_uint16x16& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s8(a))) / 2; } +inline int v_scan_forward(const v_int32x8& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s8(a))) / 4; } +inline int v_scan_forward(const v_uint32x8& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s8(a))) / 4; } +inline int v_scan_forward(const v_float32x8& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s8(a))) / 4; } +inline int v_scan_forward(const v_int64x4& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s8(a))) / 8; } +inline int v_scan_forward(const v_uint64x4& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s8(a))) / 8; } +inline int v_scan_forward(const v_float64x4& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s8(a))) / 8; } + +/** Checks **/ +#define OPENCV_HAL_IMPL_AVX_CHECK(_Tpvec, allmask) \ + inline bool v_check_all(const _Tpvec& a) { return v_signmask(a) == allmask; } \ + inline bool v_check_any(const _Tpvec& a) { return v_signmask(a) != 0; } +OPENCV_HAL_IMPL_AVX_CHECK(v_uint8x32, -1) +OPENCV_HAL_IMPL_AVX_CHECK(v_int8x32, -1) +OPENCV_HAL_IMPL_AVX_CHECK(v_uint32x8, 255) +OPENCV_HAL_IMPL_AVX_CHECK(v_int32x8, 255) +OPENCV_HAL_IMPL_AVX_CHECK(v_uint64x4, 15) +OPENCV_HAL_IMPL_AVX_CHECK(v_int64x4, 15) +OPENCV_HAL_IMPL_AVX_CHECK(v_float32x8, 255) +OPENCV_HAL_IMPL_AVX_CHECK(v_float64x4, 15) + +#define OPENCV_HAL_IMPL_AVX_CHECK_SHORT(_Tpvec) \ + inline bool v_check_all(const _Tpvec& a) { return (v_signmask(v_reinterpret_as_s8(a)) & 0xaaaaaaaa) == 0xaaaaaaaa; } \ + inline bool v_check_any(const _Tpvec& a) { return (v_signmask(v_reinterpret_as_s8(a)) & 0xaaaaaaaa) != 0; } +OPENCV_HAL_IMPL_AVX_CHECK_SHORT(v_uint16x16) +OPENCV_HAL_IMPL_AVX_CHECK_SHORT(v_int16x16) + +////////// Other math ///////// + +/** Some frequent operations **/ +#define OPENCV_HAL_IMPL_AVX_MULADD(_Tpvec, suffix) \ + inline _Tpvec v_fma(const _Tpvec& a, const _Tpvec& b, const _Tpvec& c) \ + { return _Tpvec(_mm256_fmadd_##suffix(a.val, b.val, c.val)); } \ + inline _Tpvec v_muladd(const _Tpvec& a, const _Tpvec& b, const _Tpvec& c) \ + { return _Tpvec(_mm256_fmadd_##suffix(a.val, b.val, c.val)); } \ + inline _Tpvec v_sqrt(const _Tpvec& x) \ + { return _Tpvec(_mm256_sqrt_##suffix(x.val)); } \ + inline _Tpvec v_sqr_magnitude(const _Tpvec& a, const _Tpvec& b) \ + { return v_fma(a, a, b * b); } \ + inline _Tpvec v_magnitude(const _Tpvec& a, const _Tpvec& b) \ + { return v_sqrt(v_fma(a, a, b*b)); } + +OPENCV_HAL_IMPL_AVX_MULADD(v_float32x8, ps) +OPENCV_HAL_IMPL_AVX_MULADD(v_float64x4, pd) + +inline v_int32x8 v_fma(const v_int32x8& a, const v_int32x8& b, const v_int32x8& c) +{ + return a * b + c; +} + +inline v_int32x8 v_muladd(const v_int32x8& a, const v_int32x8& b, const v_int32x8& c) +{ + return v_fma(a, b, c); +} + +inline v_float32x8 v_invsqrt(const v_float32x8& x) +{ + v_float32x8 half = x * v256_setall_f32(0.5); + v_float32x8 t = v_float32x8(_mm256_rsqrt_ps(x.val)); + // todo: _mm256_fnmsub_ps + t *= v256_setall_f32(1.5) - ((t * t) * half); + return t; +} + +inline v_float64x4 v_invsqrt(const v_float64x4& x) +{ + return v256_setall_f64(1.) / v_sqrt(x); +} + +/** Absolute values **/ +#define OPENCV_HAL_IMPL_AVX_ABS(_Tpvec, suffix) \ + inline v_u##_Tpvec v_abs(const v_##_Tpvec& x) \ + { return v_u##_Tpvec(_mm256_abs_##suffix(x.val)); } + +OPENCV_HAL_IMPL_AVX_ABS(int8x32, epi8) +OPENCV_HAL_IMPL_AVX_ABS(int16x16, epi16) +OPENCV_HAL_IMPL_AVX_ABS(int32x8, epi32) + +inline v_float32x8 v_abs(const v_float32x8& x) +{ return x & v_float32x8(_mm256_castsi256_ps(_mm256_set1_epi32(0x7fffffff))); } +inline v_float64x4 v_abs(const v_float64x4& x) +{ return x & v_float64x4(_mm256_castsi256_pd(_mm256_srli_epi64(_mm256_set1_epi64x(-1), 1))); } + +/** Absolute difference **/ +inline v_uint8x32 v_absdiff(const v_uint8x32& a, const v_uint8x32& b) +{ return v_add_wrap(a - b, b - a); } +inline v_uint16x16 v_absdiff(const v_uint16x16& a, const v_uint16x16& b) +{ return v_add_wrap(a - b, b - a); } +inline v_uint32x8 v_absdiff(const v_uint32x8& a, const v_uint32x8& b) +{ return v_max(a, b) - v_min(a, b); } + +inline v_uint8x32 v_absdiff(const v_int8x32& a, const v_int8x32& b) +{ + v_int8x32 d = v_sub_wrap(a, b); + v_int8x32 m = a < b; + return v_reinterpret_as_u8(v_sub_wrap(d ^ m, m)); +} + +inline v_uint16x16 v_absdiff(const v_int16x16& a, const v_int16x16& b) +{ return v_reinterpret_as_u16(v_sub_wrap(v_max(a, b), v_min(a, b))); } + +inline v_uint32x8 v_absdiff(const v_int32x8& a, const v_int32x8& b) +{ + v_int32x8 d = a - b; + v_int32x8 m = a < b; + return v_reinterpret_as_u32((d ^ m) - m); +} + +inline v_float32x8 v_absdiff(const v_float32x8& a, const v_float32x8& b) +{ return v_abs(a - b); } + +inline v_float64x4 v_absdiff(const v_float64x4& a, const v_float64x4& b) +{ return v_abs(a - b); } + +/** Saturating absolute difference **/ +inline v_int8x32 v_absdiffs(const v_int8x32& a, const v_int8x32& b) +{ + v_int8x32 d = a - b; + v_int8x32 m = a < b; + return (d ^ m) - m; +} +inline v_int16x16 v_absdiffs(const v_int16x16& a, const v_int16x16& b) +{ return v_max(a, b) - v_min(a, b); } + +////////// Conversions ///////// + +/** Rounding **/ +inline v_int32x8 v_round(const v_float32x8& a) +{ return v_int32x8(_mm256_cvtps_epi32(a.val)); } + +inline v_int32x8 v_round(const v_float64x4& a) +{ return v_int32x8(_mm256_castsi128_si256(_mm256_cvtpd_epi32(a.val))); } + +inline v_int32x8 v_round(const v_float64x4& a, const v_float64x4& b) +{ + __m128i ai = _mm256_cvtpd_epi32(a.val), bi = _mm256_cvtpd_epi32(b.val); + return v_int32x8(_v256_combine(ai, bi)); +} + +inline v_int32x8 v_trunc(const v_float32x8& a) +{ return v_int32x8(_mm256_cvttps_epi32(a.val)); } + +inline v_int32x8 v_trunc(const v_float64x4& a) +{ return v_int32x8(_mm256_castsi128_si256(_mm256_cvttpd_epi32(a.val))); } + +inline v_int32x8 v_floor(const v_float32x8& a) +{ return v_int32x8(_mm256_cvttps_epi32(_mm256_floor_ps(a.val))); } + +inline v_int32x8 v_floor(const v_float64x4& a) +{ return v_trunc(v_float64x4(_mm256_floor_pd(a.val))); } + +inline v_int32x8 v_ceil(const v_float32x8& a) +{ return v_int32x8(_mm256_cvttps_epi32(_mm256_ceil_ps(a.val))); } + +inline v_int32x8 v_ceil(const v_float64x4& a) +{ return v_trunc(v_float64x4(_mm256_ceil_pd(a.val))); } + +/** To float **/ +inline v_float32x8 v_cvt_f32(const v_int32x8& a) +{ return v_float32x8(_mm256_cvtepi32_ps(a.val)); } + +inline v_float32x8 v_cvt_f32(const v_float64x4& a) +{ return v_float32x8(_mm256_castps128_ps256(_mm256_cvtpd_ps(a.val))); } + +inline v_float32x8 v_cvt_f32(const v_float64x4& a, const v_float64x4& b) +{ + __m128 af = _mm256_cvtpd_ps(a.val), bf = _mm256_cvtpd_ps(b.val); + return v_float32x8(_v256_combine(af, bf)); +} + +inline v_float64x4 v_cvt_f64(const v_int32x8& a) +{ return v_float64x4(_mm256_cvtepi32_pd(_v256_extract_low(a.val))); } + +inline v_float64x4 v_cvt_f64_high(const v_int32x8& a) +{ return v_float64x4(_mm256_cvtepi32_pd(_v256_extract_high(a.val))); } + +inline v_float64x4 v_cvt_f64(const v_float32x8& a) +{ return v_float64x4(_mm256_cvtps_pd(_v256_extract_low(a.val))); } + +inline v_float64x4 v_cvt_f64_high(const v_float32x8& a) +{ return v_float64x4(_mm256_cvtps_pd(_v256_extract_high(a.val))); } + +// from (Mysticial and wim) https://stackoverflow.com/q/41144668 +inline v_float64x4 v_cvt_f64(const v_int64x4& v) +{ + // constants encoded as floating-point + __m256i magic_i_lo = _mm256_set1_epi64x(0x4330000000000000); // 2^52 + __m256i magic_i_hi32 = _mm256_set1_epi64x(0x4530000080000000); // 2^84 + 2^63 + __m256i magic_i_all = _mm256_set1_epi64x(0x4530000080100000); // 2^84 + 2^63 + 2^52 + __m256d magic_d_all = _mm256_castsi256_pd(magic_i_all); + + // Blend the 32 lowest significant bits of v with magic_int_lo + __m256i v_lo = _mm256_blend_epi32(magic_i_lo, v.val, 0x55); + // Extract the 32 most significant bits of v + __m256i v_hi = _mm256_srli_epi64(v.val, 32); + // Flip the msb of v_hi and blend with 0x45300000 + v_hi = _mm256_xor_si256(v_hi, magic_i_hi32); + // Compute in double precision + __m256d v_hi_dbl = _mm256_sub_pd(_mm256_castsi256_pd(v_hi), magic_d_all); + // (v_hi - magic_d_all) + v_lo Do not assume associativity of floating point addition + __m256d result = _mm256_add_pd(v_hi_dbl, _mm256_castsi256_pd(v_lo)); + return v_float64x4(result); +} + +////////////// Lookup table access //////////////////// + +inline v_int8x32 v256_lut(const schar* tab, const int* idx) +{ + return v_int8x32(_mm256_setr_epi8(tab[idx[ 0]], tab[idx[ 1]], tab[idx[ 2]], tab[idx[ 3]], tab[idx[ 4]], tab[idx[ 5]], tab[idx[ 6]], tab[idx[ 7]], + tab[idx[ 8]], tab[idx[ 9]], tab[idx[10]], tab[idx[11]], tab[idx[12]], tab[idx[13]], tab[idx[14]], tab[idx[15]], + tab[idx[16]], tab[idx[17]], tab[idx[18]], tab[idx[19]], tab[idx[20]], tab[idx[21]], tab[idx[22]], tab[idx[23]], + tab[idx[24]], tab[idx[25]], tab[idx[26]], tab[idx[27]], tab[idx[28]], tab[idx[29]], tab[idx[30]], tab[idx[31]])); +} +inline v_int8x32 v256_lut_pairs(const schar* tab, const int* idx) +{ + return v_int8x32(_mm256_setr_epi16(*(const short*)(tab + idx[ 0]), *(const short*)(tab + idx[ 1]), *(const short*)(tab + idx[ 2]), *(const short*)(tab + idx[ 3]), + *(const short*)(tab + idx[ 4]), *(const short*)(tab + idx[ 5]), *(const short*)(tab + idx[ 6]), *(const short*)(tab + idx[ 7]), + *(const short*)(tab + idx[ 8]), *(const short*)(tab + idx[ 9]), *(const short*)(tab + idx[10]), *(const short*)(tab + idx[11]), + *(const short*)(tab + idx[12]), *(const short*)(tab + idx[13]), *(const short*)(tab + idx[14]), *(const short*)(tab + idx[15]))); +} +inline v_int8x32 v256_lut_quads(const schar* tab, const int* idx) +{ + return v_int8x32(_mm256_i32gather_epi32((const int*)tab, _mm256_loadu_si256((const __m256i*)idx), 1)); +} +inline v_uint8x32 v256_lut(const uchar* tab, const int* idx) { return v_reinterpret_as_u8(v256_lut((const schar *)tab, idx)); } +inline v_uint8x32 v256_lut_pairs(const uchar* tab, const int* idx) { return v_reinterpret_as_u8(v256_lut_pairs((const schar *)tab, idx)); } +inline v_uint8x32 v256_lut_quads(const uchar* tab, const int* idx) { return v_reinterpret_as_u8(v256_lut_quads((const schar *)tab, idx)); } + +inline v_int16x16 v256_lut(const short* tab, const int* idx) +{ + return v_int16x16(_mm256_setr_epi16(tab[idx[0]], tab[idx[1]], tab[idx[ 2]], tab[idx[ 3]], tab[idx[ 4]], tab[idx[ 5]], tab[idx[ 6]], tab[idx[ 7]], + tab[idx[8]], tab[idx[9]], tab[idx[10]], tab[idx[11]], tab[idx[12]], tab[idx[13]], tab[idx[14]], tab[idx[15]])); +} +inline v_int16x16 v256_lut_pairs(const short* tab, const int* idx) +{ + return v_int16x16(_mm256_i32gather_epi32((const int*)tab, _mm256_loadu_si256((const __m256i*)idx), 2)); +} +inline v_int16x16 v256_lut_quads(const short* tab, const int* idx) +{ +#if defined(__GNUC__) + return v_int16x16(_mm256_i32gather_epi64((const long long int*)tab, _mm_loadu_si128((const __m128i*)idx), 2));//Looks like intrinsic has wrong definition +#else + return v_int16x16(_mm256_i32gather_epi64((const int64*)tab, _mm_loadu_si128((const __m128i*)idx), 2)); +#endif +} +inline v_uint16x16 v256_lut(const ushort* tab, const int* idx) { return v_reinterpret_as_u16(v256_lut((const short *)tab, idx)); } +inline v_uint16x16 v256_lut_pairs(const ushort* tab, const int* idx) { return v_reinterpret_as_u16(v256_lut_pairs((const short *)tab, idx)); } +inline v_uint16x16 v256_lut_quads(const ushort* tab, const int* idx) { return v_reinterpret_as_u16(v256_lut_quads((const short *)tab, idx)); } + +inline v_int32x8 v256_lut(const int* tab, const int* idx) +{ + return v_int32x8(_mm256_i32gather_epi32(tab, _mm256_loadu_si256((const __m256i*)idx), 4)); +} +inline v_int32x8 v256_lut_pairs(const int* tab, const int* idx) +{ +#if defined(__GNUC__) + return v_int32x8(_mm256_i32gather_epi64((const long long int*)tab, _mm_loadu_si128((const __m128i*)idx), 4)); +#else + return v_int32x8(_mm256_i32gather_epi64((const int64*)tab, _mm_loadu_si128((const __m128i*)idx), 4)); +#endif +} +inline v_int32x8 v256_lut_quads(const int* tab, const int* idx) +{ + return v_int32x8(_v256_combine(_mm_loadu_si128((const __m128i*)(tab + idx[0])), _mm_loadu_si128((const __m128i*)(tab + idx[1])))); +} +inline v_uint32x8 v256_lut(const unsigned* tab, const int* idx) { return v_reinterpret_as_u32(v256_lut((const int *)tab, idx)); } +inline v_uint32x8 v256_lut_pairs(const unsigned* tab, const int* idx) { return v_reinterpret_as_u32(v256_lut_pairs((const int *)tab, idx)); } +inline v_uint32x8 v256_lut_quads(const unsigned* tab, const int* idx) { return v_reinterpret_as_u32(v256_lut_quads((const int *)tab, idx)); } + +inline v_int64x4 v256_lut(const int64* tab, const int* idx) +{ +#if defined(__GNUC__) + return v_int64x4(_mm256_i32gather_epi64((const long long int*)tab, _mm_loadu_si128((const __m128i*)idx), 8)); +#else + return v_int64x4(_mm256_i32gather_epi64(tab, _mm_loadu_si128((const __m128i*)idx), 8)); +#endif +} +inline v_int64x4 v256_lut_pairs(const int64* tab, const int* idx) +{ + return v_int64x4(_v256_combine(_mm_loadu_si128((const __m128i*)(tab + idx[0])), _mm_loadu_si128((const __m128i*)(tab + idx[1])))); +} +inline v_uint64x4 v256_lut(const uint64* tab, const int* idx) { return v_reinterpret_as_u64(v256_lut((const int64 *)tab, idx)); } +inline v_uint64x4 v256_lut_pairs(const uint64* tab, const int* idx) { return v_reinterpret_as_u64(v256_lut_pairs((const int64 *)tab, idx)); } + +inline v_float32x8 v256_lut(const float* tab, const int* idx) +{ + return v_float32x8(_mm256_i32gather_ps(tab, _mm256_loadu_si256((const __m256i*)idx), 4)); +} +inline v_float32x8 v256_lut_pairs(const float* tab, const int* idx) { return v_reinterpret_as_f32(v256_lut_pairs((const int *)tab, idx)); } +inline v_float32x8 v256_lut_quads(const float* tab, const int* idx) { return v_reinterpret_as_f32(v256_lut_quads((const int *)tab, idx)); } + +inline v_float64x4 v256_lut(const double* tab, const int* idx) +{ + return v_float64x4(_mm256_i32gather_pd(tab, _mm_loadu_si128((const __m128i*)idx), 8)); +} +inline v_float64x4 v256_lut_pairs(const double* tab, const int* idx) { return v_float64x4(_v256_combine(_mm_loadu_pd(tab + idx[0]), _mm_loadu_pd(tab + idx[1]))); } + +inline v_int32x8 v_lut(const int* tab, const v_int32x8& idxvec) +{ + return v_int32x8(_mm256_i32gather_epi32(tab, idxvec.val, 4)); +} + +inline v_uint32x8 v_lut(const unsigned* tab, const v_int32x8& idxvec) +{ + return v_reinterpret_as_u32(v_lut((const int *)tab, idxvec)); +} + +inline v_float32x8 v_lut(const float* tab, const v_int32x8& idxvec) +{ + return v_float32x8(_mm256_i32gather_ps(tab, idxvec.val, 4)); +} + +inline v_float64x4 v_lut(const double* tab, const v_int32x8& idxvec) +{ + return v_float64x4(_mm256_i32gather_pd(tab, _mm256_castsi256_si128(idxvec.val), 8)); +} + +inline void v_lut_deinterleave(const float* tab, const v_int32x8& idxvec, v_float32x8& x, v_float32x8& y) +{ + int CV_DECL_ALIGNED(32) idx[8]; + v_store_aligned(idx, idxvec); + __m128 z = _mm_setzero_ps(); + __m128 xy01, xy45, xy23, xy67; + xy01 = _mm_loadl_pi(z, (const __m64*)(tab + idx[0])); + xy01 = _mm_loadh_pi(xy01, (const __m64*)(tab + idx[1])); + xy45 = _mm_loadl_pi(z, (const __m64*)(tab + idx[4])); + xy45 = _mm_loadh_pi(xy45, (const __m64*)(tab + idx[5])); + __m256 xy0145 = _v256_combine(xy01, xy45); + xy23 = _mm_loadl_pi(z, (const __m64*)(tab + idx[2])); + xy23 = _mm_loadh_pi(xy23, (const __m64*)(tab + idx[3])); + xy67 = _mm_loadl_pi(z, (const __m64*)(tab + idx[6])); + xy67 = _mm_loadh_pi(xy67, (const __m64*)(tab + idx[7])); + __m256 xy2367 = _v256_combine(xy23, xy67); + + __m256 xxyy0145 = _mm256_unpacklo_ps(xy0145, xy2367); + __m256 xxyy2367 = _mm256_unpackhi_ps(xy0145, xy2367); + + x = v_float32x8(_mm256_unpacklo_ps(xxyy0145, xxyy2367)); + y = v_float32x8(_mm256_unpackhi_ps(xxyy0145, xxyy2367)); +} + +inline void v_lut_deinterleave(const double* tab, const v_int32x8& idxvec, v_float64x4& x, v_float64x4& y) +{ + int CV_DECL_ALIGNED(32) idx[4]; + v_store_low(idx, idxvec); + __m128d xy0 = _mm_loadu_pd(tab + idx[0]); + __m128d xy2 = _mm_loadu_pd(tab + idx[2]); + __m128d xy1 = _mm_loadu_pd(tab + idx[1]); + __m128d xy3 = _mm_loadu_pd(tab + idx[3]); + __m256d xy02 = _v256_combine(xy0, xy2); + __m256d xy13 = _v256_combine(xy1, xy3); + + x = v_float64x4(_mm256_unpacklo_pd(xy02, xy13)); + y = v_float64x4(_mm256_unpackhi_pd(xy02, xy13)); +} + +inline v_int8x32 v_interleave_pairs(const v_int8x32& vec) +{ + return v_int8x32(_mm256_shuffle_epi8(vec.val, _mm256_set_epi64x(0x0f0d0e0c0b090a08, 0x0705060403010200, 0x0f0d0e0c0b090a08, 0x0705060403010200))); +} +inline v_uint8x32 v_interleave_pairs(const v_uint8x32& vec) { return v_reinterpret_as_u8(v_interleave_pairs(v_reinterpret_as_s8(vec))); } +inline v_int8x32 v_interleave_quads(const v_int8x32& vec) +{ + return v_int8x32(_mm256_shuffle_epi8(vec.val, _mm256_set_epi64x(0x0f0b0e0a0d090c08, 0x0703060205010400, 0x0f0b0e0a0d090c08, 0x0703060205010400))); +} +inline v_uint8x32 v_interleave_quads(const v_uint8x32& vec) { return v_reinterpret_as_u8(v_interleave_quads(v_reinterpret_as_s8(vec))); } + +inline v_int16x16 v_interleave_pairs(const v_int16x16& vec) +{ + return v_int16x16(_mm256_shuffle_epi8(vec.val, _mm256_set_epi64x(0x0f0e0b0a0d0c0908, 0x0706030205040100, 0x0f0e0b0a0d0c0908, 0x0706030205040100))); +} +inline v_uint16x16 v_interleave_pairs(const v_uint16x16& vec) { return v_reinterpret_as_u16(v_interleave_pairs(v_reinterpret_as_s16(vec))); } +inline v_int16x16 v_interleave_quads(const v_int16x16& vec) +{ + return v_int16x16(_mm256_shuffle_epi8(vec.val, _mm256_set_epi64x(0x0f0e07060d0c0504, 0x0b0a030209080100, 0x0f0e07060d0c0504, 0x0b0a030209080100))); +} +inline v_uint16x16 v_interleave_quads(const v_uint16x16& vec) { return v_reinterpret_as_u16(v_interleave_quads(v_reinterpret_as_s16(vec))); } + +inline v_int32x8 v_interleave_pairs(const v_int32x8& vec) +{ + return v_int32x8(_mm256_shuffle_epi32(vec.val, _MM_SHUFFLE(3, 1, 2, 0))); +} +inline v_uint32x8 v_interleave_pairs(const v_uint32x8& vec) { return v_reinterpret_as_u32(v_interleave_pairs(v_reinterpret_as_s32(vec))); } +inline v_float32x8 v_interleave_pairs(const v_float32x8& vec) { return v_reinterpret_as_f32(v_interleave_pairs(v_reinterpret_as_s32(vec))); } + +inline v_int8x32 v_pack_triplets(const v_int8x32& vec) +{ + return v_int8x32(_mm256_permutevar8x32_epi32(_mm256_shuffle_epi8(vec.val, _mm256_broadcastsi128_si256(_mm_set_epi64x(0xffffff0f0e0d0c0a, 0x0908060504020100))), + _mm256_set_epi64x(0x0000000700000007, 0x0000000600000005, 0x0000000400000002, 0x0000000100000000))); +} +inline v_uint8x32 v_pack_triplets(const v_uint8x32& vec) { return v_reinterpret_as_u8(v_pack_triplets(v_reinterpret_as_s8(vec))); } + +inline v_int16x16 v_pack_triplets(const v_int16x16& vec) +{ + return v_int16x16(_mm256_permutevar8x32_epi32(_mm256_shuffle_epi8(vec.val, _mm256_broadcastsi128_si256(_mm_set_epi64x(0xffff0f0e0d0c0b0a, 0x0908050403020100))), + _mm256_set_epi64x(0x0000000700000007, 0x0000000600000005, 0x0000000400000002, 0x0000000100000000))); +} +inline v_uint16x16 v_pack_triplets(const v_uint16x16& vec) { return v_reinterpret_as_u16(v_pack_triplets(v_reinterpret_as_s16(vec))); } + +inline v_int32x8 v_pack_triplets(const v_int32x8& vec) +{ + return v_int32x8(_mm256_permutevar8x32_epi32(vec.val, _mm256_set_epi64x(0x0000000700000007, 0x0000000600000005, 0x0000000400000002, 0x0000000100000000))); +} +inline v_uint32x8 v_pack_triplets(const v_uint32x8& vec) { return v_reinterpret_as_u32(v_pack_triplets(v_reinterpret_as_s32(vec))); } +inline v_float32x8 v_pack_triplets(const v_float32x8& vec) +{ + return v_float32x8(_mm256_permutevar8x32_ps(vec.val, _mm256_set_epi64x(0x0000000700000007, 0x0000000600000005, 0x0000000400000002, 0x0000000100000000))); +} + +////////// Matrix operations ///////// + +//////// Dot Product //////// + +// 16 >> 32 +inline v_int32x8 v_dotprod(const v_int16x16& a, const v_int16x16& b) +{ return v_int32x8(_mm256_madd_epi16(a.val, b.val)); } +inline v_int32x8 v_dotprod(const v_int16x16& a, const v_int16x16& b, const v_int32x8& c) +{ return v_dotprod(a, b) + c; } + +// 32 >> 64 +inline v_int64x4 v_dotprod(const v_int32x8& a, const v_int32x8& b) +{ + __m256i even = _mm256_mul_epi32(a.val, b.val); + __m256i odd = _mm256_mul_epi32(_mm256_srli_epi64(a.val, 32), _mm256_srli_epi64(b.val, 32)); + return v_int64x4(_mm256_add_epi64(even, odd)); +} +inline v_int64x4 v_dotprod(const v_int32x8& a, const v_int32x8& b, const v_int64x4& c) +{ return v_dotprod(a, b) + c; } + +// 8 >> 32 +inline v_uint32x8 v_dotprod_expand(const v_uint8x32& a, const v_uint8x32& b) +{ + __m256i even_m = _mm256_set1_epi32(0xFF00FF00); + __m256i even_a = _mm256_blendv_epi8(a.val, _mm256_setzero_si256(), even_m); + __m256i odd_a = _mm256_srli_epi16(a.val, 8); + + __m256i even_b = _mm256_blendv_epi8(b.val, _mm256_setzero_si256(), even_m); + __m256i odd_b = _mm256_srli_epi16(b.val, 8); + + __m256i prod0 = _mm256_madd_epi16(even_a, even_b); + __m256i prod1 = _mm256_madd_epi16(odd_a, odd_b); + return v_uint32x8(_mm256_add_epi32(prod0, prod1)); +} +inline v_uint32x8 v_dotprod_expand(const v_uint8x32& a, const v_uint8x32& b, const v_uint32x8& c) +{ return v_dotprod_expand(a, b) + c; } + +inline v_int32x8 v_dotprod_expand(const v_int8x32& a, const v_int8x32& b) +{ + __m256i even_a = _mm256_srai_epi16(_mm256_bslli_epi128(a.val, 1), 8); + __m256i odd_a = _mm256_srai_epi16(a.val, 8); + + __m256i even_b = _mm256_srai_epi16(_mm256_bslli_epi128(b.val, 1), 8); + __m256i odd_b = _mm256_srai_epi16(b.val, 8); + + __m256i prod0 = _mm256_madd_epi16(even_a, even_b); + __m256i prod1 = _mm256_madd_epi16(odd_a, odd_b); + return v_int32x8(_mm256_add_epi32(prod0, prod1)); +} +inline v_int32x8 v_dotprod_expand(const v_int8x32& a, const v_int8x32& b, const v_int32x8& c) +{ return v_dotprod_expand(a, b) + c; } + +// 16 >> 64 +inline v_uint64x4 v_dotprod_expand(const v_uint16x16& a, const v_uint16x16& b) +{ + __m256i mullo = _mm256_mullo_epi16(a.val, b.val); + __m256i mulhi = _mm256_mulhi_epu16(a.val, b.val); + __m256i mul0 = _mm256_unpacklo_epi16(mullo, mulhi); + __m256i mul1 = _mm256_unpackhi_epi16(mullo, mulhi); + + __m256i p02 = _mm256_blend_epi32(mul0, _mm256_setzero_si256(), 0xAA); + __m256i p13 = _mm256_srli_epi64(mul0, 32); + __m256i p46 = _mm256_blend_epi32(mul1, _mm256_setzero_si256(), 0xAA); + __m256i p57 = _mm256_srli_epi64(mul1, 32); + + __m256i p15_ = _mm256_add_epi64(p02, p13); + __m256i p9d_ = _mm256_add_epi64(p46, p57); + + return v_uint64x4(_mm256_add_epi64( + _mm256_unpacklo_epi64(p15_, p9d_), + _mm256_unpackhi_epi64(p15_, p9d_) + )); +} +inline v_uint64x4 v_dotprod_expand(const v_uint16x16& a, const v_uint16x16& b, const v_uint64x4& c) +{ return v_dotprod_expand(a, b) + c; } + +inline v_int64x4 v_dotprod_expand(const v_int16x16& a, const v_int16x16& b) +{ + __m256i prod = _mm256_madd_epi16(a.val, b.val); + __m256i sign = _mm256_srai_epi32(prod, 31); + + __m256i lo = _mm256_unpacklo_epi32(prod, sign); + __m256i hi = _mm256_unpackhi_epi32(prod, sign); + + return v_int64x4(_mm256_add_epi64( + _mm256_unpacklo_epi64(lo, hi), + _mm256_unpackhi_epi64(lo, hi) + )); +} +inline v_int64x4 v_dotprod_expand(const v_int16x16& a, const v_int16x16& b, const v_int64x4& c) +{ return v_dotprod_expand(a, b) + c; } + +// 32 >> 64f +inline v_float64x4 v_dotprod_expand(const v_int32x8& a, const v_int32x8& b) +{ return v_cvt_f64(v_dotprod(a, b)); } +inline v_float64x4 v_dotprod_expand(const v_int32x8& a, const v_int32x8& b, const v_float64x4& c) +{ return v_dotprod_expand(a, b) + c; } + +//////// Fast Dot Product //////// + +// 16 >> 32 +inline v_int32x8 v_dotprod_fast(const v_int16x16& a, const v_int16x16& b) +{ return v_dotprod(a, b); } +inline v_int32x8 v_dotprod_fast(const v_int16x16& a, const v_int16x16& b, const v_int32x8& c) +{ return v_dotprod(a, b, c); } + +// 32 >> 64 +inline v_int64x4 v_dotprod_fast(const v_int32x8& a, const v_int32x8& b) +{ return v_dotprod(a, b); } +inline v_int64x4 v_dotprod_fast(const v_int32x8& a, const v_int32x8& b, const v_int64x4& c) +{ return v_dotprod(a, b, c); } + +// 8 >> 32 +inline v_uint32x8 v_dotprod_expand_fast(const v_uint8x32& a, const v_uint8x32& b) +{ return v_dotprod_expand(a, b); } +inline v_uint32x8 v_dotprod_expand_fast(const v_uint8x32& a, const v_uint8x32& b, const v_uint32x8& c) +{ return v_dotprod_expand(a, b, c); } + +inline v_int32x8 v_dotprod_expand_fast(const v_int8x32& a, const v_int8x32& b) +{ return v_dotprod_expand(a, b); } +inline v_int32x8 v_dotprod_expand_fast(const v_int8x32& a, const v_int8x32& b, const v_int32x8& c) +{ return v_dotprod_expand(a, b, c); } + +// 16 >> 64 +inline v_uint64x4 v_dotprod_expand_fast(const v_uint16x16& a, const v_uint16x16& b) +{ + __m256i mullo = _mm256_mullo_epi16(a.val, b.val); + __m256i mulhi = _mm256_mulhi_epu16(a.val, b.val); + __m256i mul0 = _mm256_unpacklo_epi16(mullo, mulhi); + __m256i mul1 = _mm256_unpackhi_epi16(mullo, mulhi); + + __m256i p02 = _mm256_blend_epi32(mul0, _mm256_setzero_si256(), 0xAA); + __m256i p13 = _mm256_srli_epi64(mul0, 32); + __m256i p46 = _mm256_blend_epi32(mul1, _mm256_setzero_si256(), 0xAA); + __m256i p57 = _mm256_srli_epi64(mul1, 32); + + __m256i p15_ = _mm256_add_epi64(p02, p13); + __m256i p9d_ = _mm256_add_epi64(p46, p57); + + return v_uint64x4(_mm256_add_epi64(p15_, p9d_)); +} +inline v_uint64x4 v_dotprod_expand_fast(const v_uint16x16& a, const v_uint16x16& b, const v_uint64x4& c) +{ return v_dotprod_expand_fast(a, b) + c; } + +inline v_int64x4 v_dotprod_expand_fast(const v_int16x16& a, const v_int16x16& b) +{ + __m256i prod = _mm256_madd_epi16(a.val, b.val); + __m256i sign = _mm256_srai_epi32(prod, 31); + __m256i lo = _mm256_unpacklo_epi32(prod, sign); + __m256i hi = _mm256_unpackhi_epi32(prod, sign); + return v_int64x4(_mm256_add_epi64(lo, hi)); +} +inline v_int64x4 v_dotprod_expand_fast(const v_int16x16& a, const v_int16x16& b, const v_int64x4& c) +{ return v_dotprod_expand_fast(a, b) + c; } + +// 32 >> 64f +inline v_float64x4 v_dotprod_expand_fast(const v_int32x8& a, const v_int32x8& b) +{ return v_dotprod_expand(a, b); } +inline v_float64x4 v_dotprod_expand_fast(const v_int32x8& a, const v_int32x8& b, const v_float64x4& c) +{ return v_dotprod_expand(a, b, c); } + +#define OPENCV_HAL_AVX_SPLAT2_PS(a, im) \ + v_float32x8(_mm256_permute_ps(a.val, _MM_SHUFFLE(im, im, im, im))) + +inline v_float32x8 v_matmul(const v_float32x8& v, const v_float32x8& m0, + const v_float32x8& m1, const v_float32x8& m2, + const v_float32x8& m3) +{ + v_float32x8 v04 = OPENCV_HAL_AVX_SPLAT2_PS(v, 0); + v_float32x8 v15 = OPENCV_HAL_AVX_SPLAT2_PS(v, 1); + v_float32x8 v26 = OPENCV_HAL_AVX_SPLAT2_PS(v, 2); + v_float32x8 v37 = OPENCV_HAL_AVX_SPLAT2_PS(v, 3); + return v_fma(v04, m0, v_fma(v15, m1, v_fma(v26, m2, v37 * m3))); +} + +inline v_float32x8 v_matmuladd(const v_float32x8& v, const v_float32x8& m0, + const v_float32x8& m1, const v_float32x8& m2, + const v_float32x8& a) +{ + v_float32x8 v04 = OPENCV_HAL_AVX_SPLAT2_PS(v, 0); + v_float32x8 v15 = OPENCV_HAL_AVX_SPLAT2_PS(v, 1); + v_float32x8 v26 = OPENCV_HAL_AVX_SPLAT2_PS(v, 2); + return v_fma(v04, m0, v_fma(v15, m1, v_fma(v26, m2, a))); +} + +#define OPENCV_HAL_IMPL_AVX_TRANSPOSE4x4(_Tpvec, suffix, cast_from, cast_to) \ + inline void v_transpose4x4(const _Tpvec& a0, const _Tpvec& a1, \ + const _Tpvec& a2, const _Tpvec& a3, \ + _Tpvec& b0, _Tpvec& b1, _Tpvec& b2, _Tpvec& b3) \ + { \ + __m256i t0 = cast_from(_mm256_unpacklo_##suffix(a0.val, a1.val)); \ + __m256i t1 = cast_from(_mm256_unpacklo_##suffix(a2.val, a3.val)); \ + __m256i t2 = cast_from(_mm256_unpackhi_##suffix(a0.val, a1.val)); \ + __m256i t3 = cast_from(_mm256_unpackhi_##suffix(a2.val, a3.val)); \ + b0.val = cast_to(_mm256_unpacklo_epi64(t0, t1)); \ + b1.val = cast_to(_mm256_unpackhi_epi64(t0, t1)); \ + b2.val = cast_to(_mm256_unpacklo_epi64(t2, t3)); \ + b3.val = cast_to(_mm256_unpackhi_epi64(t2, t3)); \ + } + +OPENCV_HAL_IMPL_AVX_TRANSPOSE4x4(v_uint32x8, epi32, OPENCV_HAL_NOP, OPENCV_HAL_NOP) +OPENCV_HAL_IMPL_AVX_TRANSPOSE4x4(v_int32x8, epi32, OPENCV_HAL_NOP, OPENCV_HAL_NOP) +OPENCV_HAL_IMPL_AVX_TRANSPOSE4x4(v_float32x8, ps, _mm256_castps_si256, _mm256_castsi256_ps) + +//////////////// Value reordering /////////////// + +/* Expand */ +#define OPENCV_HAL_IMPL_AVX_EXPAND(_Tpvec, _Tpwvec, _Tp, intrin) \ + inline void v_expand(const _Tpvec& a, _Tpwvec& b0, _Tpwvec& b1) \ + { \ + b0.val = intrin(_v256_extract_low(a.val)); \ + b1.val = intrin(_v256_extract_high(a.val)); \ + } \ + inline _Tpwvec v_expand_low(const _Tpvec& a) \ + { return _Tpwvec(intrin(_v256_extract_low(a.val))); } \ + inline _Tpwvec v_expand_high(const _Tpvec& a) \ + { return _Tpwvec(intrin(_v256_extract_high(a.val))); } \ + inline _Tpwvec v256_load_expand(const _Tp* ptr) \ + { \ + __m128i a = _mm_loadu_si128((const __m128i*)ptr); \ + return _Tpwvec(intrin(a)); \ + } + +OPENCV_HAL_IMPL_AVX_EXPAND(v_uint8x32, v_uint16x16, uchar, _mm256_cvtepu8_epi16) +OPENCV_HAL_IMPL_AVX_EXPAND(v_int8x32, v_int16x16, schar, _mm256_cvtepi8_epi16) +OPENCV_HAL_IMPL_AVX_EXPAND(v_uint16x16, v_uint32x8, ushort, _mm256_cvtepu16_epi32) +OPENCV_HAL_IMPL_AVX_EXPAND(v_int16x16, v_int32x8, short, _mm256_cvtepi16_epi32) +OPENCV_HAL_IMPL_AVX_EXPAND(v_uint32x8, v_uint64x4, unsigned, _mm256_cvtepu32_epi64) +OPENCV_HAL_IMPL_AVX_EXPAND(v_int32x8, v_int64x4, int, _mm256_cvtepi32_epi64) + +#define OPENCV_HAL_IMPL_AVX_EXPAND_Q(_Tpvec, _Tp, intrin) \ + inline _Tpvec v256_load_expand_q(const _Tp* ptr) \ + { \ + __m128i a = _mm_loadl_epi64((const __m128i*)ptr); \ + return _Tpvec(intrin(a)); \ + } + +OPENCV_HAL_IMPL_AVX_EXPAND_Q(v_uint32x8, uchar, _mm256_cvtepu8_epi32) +OPENCV_HAL_IMPL_AVX_EXPAND_Q(v_int32x8, schar, _mm256_cvtepi8_epi32) + +/* pack */ +// 16 +inline v_int8x32 v_pack(const v_int16x16& a, const v_int16x16& b) +{ return v_int8x32(_v256_shuffle_odd_64(_mm256_packs_epi16(a.val, b.val))); } + +inline v_uint8x32 v_pack(const v_uint16x16& a, const v_uint16x16& b) +{ + __m256i t = _mm256_set1_epi16(255); + __m256i a1 = _mm256_min_epu16(a.val, t); + __m256i b1 = _mm256_min_epu16(b.val, t); + return v_uint8x32(_v256_shuffle_odd_64(_mm256_packus_epi16(a1, b1))); +} + +inline v_uint8x32 v_pack_u(const v_int16x16& a, const v_int16x16& b) +{ + return v_uint8x32(_v256_shuffle_odd_64(_mm256_packus_epi16(a.val, b.val))); +} + +inline void v_pack_store(schar* ptr, const v_int16x16& a) +{ v_store_low(ptr, v_pack(a, a)); } + +inline void v_pack_store(uchar* ptr, const v_uint16x16& a) +{ + const __m256i m = _mm256_set1_epi16(255); + __m256i am = _mm256_min_epu16(a.val, m); + am = _v256_shuffle_odd_64(_mm256_packus_epi16(am, am)); + v_store_low(ptr, v_uint8x32(am)); +} + +inline void v_pack_u_store(uchar* ptr, const v_int16x16& a) +{ v_store_low(ptr, v_pack_u(a, a)); } + +template inline +v_uint8x32 v_rshr_pack(const v_uint16x16& a, const v_uint16x16& b) +{ + // we assume that n > 0, and so the shifted 16-bit values can be treated as signed numbers. + v_uint16x16 delta = v256_setall_u16((short)(1 << (n-1))); + return v_pack_u(v_reinterpret_as_s16((a + delta) >> n), + v_reinterpret_as_s16((b + delta) >> n)); +} + +template inline +void v_rshr_pack_store(uchar* ptr, const v_uint16x16& a) +{ + v_uint16x16 delta = v256_setall_u16((short)(1 << (n-1))); + v_pack_u_store(ptr, v_reinterpret_as_s16((a + delta) >> n)); +} + +template inline +v_uint8x32 v_rshr_pack_u(const v_int16x16& a, const v_int16x16& b) +{ + v_int16x16 delta = v256_setall_s16((short)(1 << (n-1))); + return v_pack_u((a + delta) >> n, (b + delta) >> n); +} + +template inline +void v_rshr_pack_u_store(uchar* ptr, const v_int16x16& a) +{ + v_int16x16 delta = v256_setall_s16((short)(1 << (n-1))); + v_pack_u_store(ptr, (a + delta) >> n); +} + +template inline +v_int8x32 v_rshr_pack(const v_int16x16& a, const v_int16x16& b) +{ + v_int16x16 delta = v256_setall_s16((short)(1 << (n-1))); + return v_pack((a + delta) >> n, (b + delta) >> n); +} + +template inline +void v_rshr_pack_store(schar* ptr, const v_int16x16& a) +{ + v_int16x16 delta = v256_setall_s16((short)(1 << (n-1))); + v_pack_store(ptr, (a + delta) >> n); +} + +// 32 +inline v_int16x16 v_pack(const v_int32x8& a, const v_int32x8& b) +{ return v_int16x16(_v256_shuffle_odd_64(_mm256_packs_epi32(a.val, b.val))); } + +inline v_uint16x16 v_pack(const v_uint32x8& a, const v_uint32x8& b) +{ return v_uint16x16(_v256_shuffle_odd_64(_v256_packs_epu32(a.val, b.val))); } + +inline v_uint16x16 v_pack_u(const v_int32x8& a, const v_int32x8& b) +{ return v_uint16x16(_v256_shuffle_odd_64(_mm256_packus_epi32(a.val, b.val))); } + +inline void v_pack_store(short* ptr, const v_int32x8& a) +{ v_store_low(ptr, v_pack(a, a)); } + +inline void v_pack_store(ushort* ptr, const v_uint32x8& a) +{ + const __m256i m = _mm256_set1_epi32(65535); + __m256i am = _mm256_min_epu32(a.val, m); + am = _v256_shuffle_odd_64(_mm256_packus_epi32(am, am)); + v_store_low(ptr, v_uint16x16(am)); +} + +inline void v_pack_u_store(ushort* ptr, const v_int32x8& a) +{ v_store_low(ptr, v_pack_u(a, a)); } + + +template inline +v_uint16x16 v_rshr_pack(const v_uint32x8& a, const v_uint32x8& b) +{ + // we assume that n > 0, and so the shifted 32-bit values can be treated as signed numbers. + v_uint32x8 delta = v256_setall_u32(1 << (n-1)); + return v_pack_u(v_reinterpret_as_s32((a + delta) >> n), + v_reinterpret_as_s32((b + delta) >> n)); +} + +template inline +void v_rshr_pack_store(ushort* ptr, const v_uint32x8& a) +{ + v_uint32x8 delta = v256_setall_u32(1 << (n-1)); + v_pack_u_store(ptr, v_reinterpret_as_s32((a + delta) >> n)); +} + +template inline +v_uint16x16 v_rshr_pack_u(const v_int32x8& a, const v_int32x8& b) +{ + v_int32x8 delta = v256_setall_s32(1 << (n-1)); + return v_pack_u((a + delta) >> n, (b + delta) >> n); +} + +template inline +void v_rshr_pack_u_store(ushort* ptr, const v_int32x8& a) +{ + v_int32x8 delta = v256_setall_s32(1 << (n-1)); + v_pack_u_store(ptr, (a + delta) >> n); +} + +template inline +v_int16x16 v_rshr_pack(const v_int32x8& a, const v_int32x8& b) +{ + v_int32x8 delta = v256_setall_s32(1 << (n-1)); + return v_pack((a + delta) >> n, (b + delta) >> n); +} + +template inline +void v_rshr_pack_store(short* ptr, const v_int32x8& a) +{ + v_int32x8 delta = v256_setall_s32(1 << (n-1)); + v_pack_store(ptr, (a + delta) >> n); +} + +// 64 +// Non-saturating pack +inline v_uint32x8 v_pack(const v_uint64x4& a, const v_uint64x4& b) +{ + __m256i a0 = _mm256_shuffle_epi32(a.val, _MM_SHUFFLE(0, 0, 2, 0)); + __m256i b0 = _mm256_shuffle_epi32(b.val, _MM_SHUFFLE(0, 0, 2, 0)); + __m256i ab = _mm256_unpacklo_epi64(a0, b0); // a0, a1, b0, b1, a2, a3, b2, b3 + return v_uint32x8(_v256_shuffle_odd_64(ab)); +} + +inline v_int32x8 v_pack(const v_int64x4& a, const v_int64x4& b) +{ return v_reinterpret_as_s32(v_pack(v_reinterpret_as_u64(a), v_reinterpret_as_u64(b))); } + +inline void v_pack_store(unsigned* ptr, const v_uint64x4& a) +{ + __m256i a0 = _mm256_shuffle_epi32(a.val, _MM_SHUFFLE(0, 0, 2, 0)); + v_store_low(ptr, v_uint32x8(_v256_shuffle_odd_64(a0))); +} + +inline void v_pack_store(int* ptr, const v_int64x4& b) +{ v_pack_store((unsigned*)ptr, v_reinterpret_as_u64(b)); } + +template inline +v_uint32x8 v_rshr_pack(const v_uint64x4& a, const v_uint64x4& b) +{ + v_uint64x4 delta = v256_setall_u64((uint64)1 << (n-1)); + return v_pack((a + delta) >> n, (b + delta) >> n); +} + +template inline +void v_rshr_pack_store(unsigned* ptr, const v_uint64x4& a) +{ + v_uint64x4 delta = v256_setall_u64((uint64)1 << (n-1)); + v_pack_store(ptr, (a + delta) >> n); +} + +template inline +v_int32x8 v_rshr_pack(const v_int64x4& a, const v_int64x4& b) +{ + v_int64x4 delta = v256_setall_s64((int64)1 << (n-1)); + return v_pack((a + delta) >> n, (b + delta) >> n); +} + +template inline +void v_rshr_pack_store(int* ptr, const v_int64x4& a) +{ + v_int64x4 delta = v256_setall_s64((int64)1 << (n-1)); + v_pack_store(ptr, (a + delta) >> n); +} + +// pack boolean +inline v_uint8x32 v_pack_b(const v_uint16x16& a, const v_uint16x16& b) +{ + __m256i ab = _mm256_packs_epi16(a.val, b.val); + return v_uint8x32(_v256_shuffle_odd_64(ab)); +} + +inline v_uint8x32 v_pack_b(const v_uint32x8& a, const v_uint32x8& b, + const v_uint32x8& c, const v_uint32x8& d) +{ + __m256i ab = _mm256_packs_epi32(a.val, b.val); + __m256i cd = _mm256_packs_epi32(c.val, d.val); + + __m256i abcd = _v256_shuffle_odd_64(_mm256_packs_epi16(ab, cd)); + return v_uint8x32(_mm256_shuffle_epi32(abcd, _MM_SHUFFLE(3, 1, 2, 0))); +} + +inline v_uint8x32 v_pack_b(const v_uint64x4& a, const v_uint64x4& b, const v_uint64x4& c, + const v_uint64x4& d, const v_uint64x4& e, const v_uint64x4& f, + const v_uint64x4& g, const v_uint64x4& h) +{ + __m256i ab = _mm256_packs_epi32(a.val, b.val); + __m256i cd = _mm256_packs_epi32(c.val, d.val); + __m256i ef = _mm256_packs_epi32(e.val, f.val); + __m256i gh = _mm256_packs_epi32(g.val, h.val); + + __m256i abcd = _mm256_packs_epi32(ab, cd); + __m256i efgh = _mm256_packs_epi32(ef, gh); + __m256i pkall = _v256_shuffle_odd_64(_mm256_packs_epi16(abcd, efgh)); + + __m256i rev = _mm256_alignr_epi8(pkall, pkall, 8); + return v_uint8x32(_mm256_unpacklo_epi16(pkall, rev)); +} + +/* Recombine */ +// its up there with load and store operations + +/* Extract */ +#define OPENCV_HAL_IMPL_AVX_EXTRACT(_Tpvec) \ + template \ + inline _Tpvec v_extract(const _Tpvec& a, const _Tpvec& b) \ + { return v_rotate_right(a, b); } + +OPENCV_HAL_IMPL_AVX_EXTRACT(v_uint8x32) +OPENCV_HAL_IMPL_AVX_EXTRACT(v_int8x32) +OPENCV_HAL_IMPL_AVX_EXTRACT(v_uint16x16) +OPENCV_HAL_IMPL_AVX_EXTRACT(v_int16x16) +OPENCV_HAL_IMPL_AVX_EXTRACT(v_uint32x8) +OPENCV_HAL_IMPL_AVX_EXTRACT(v_int32x8) +OPENCV_HAL_IMPL_AVX_EXTRACT(v_uint64x4) +OPENCV_HAL_IMPL_AVX_EXTRACT(v_int64x4) +OPENCV_HAL_IMPL_AVX_EXTRACT(v_float32x8) +OPENCV_HAL_IMPL_AVX_EXTRACT(v_float64x4) + +template +inline uchar v_extract_n(v_uint8x32 a) +{ + return (uchar)_v256_extract_epi8(a.val); +} + +template +inline schar v_extract_n(v_int8x32 a) +{ + return (schar)v_extract_n(v_reinterpret_as_u8(a)); +} + +template +inline ushort v_extract_n(v_uint16x16 a) +{ + return (ushort)_v256_extract_epi16(a.val); +} + +template +inline short v_extract_n(v_int16x16 a) +{ + return (short)v_extract_n(v_reinterpret_as_u16(a)); +} + +template +inline uint v_extract_n(v_uint32x8 a) +{ + return (uint)_v256_extract_epi32(a.val); +} + +template +inline int v_extract_n(v_int32x8 a) +{ + return (int)v_extract_n(v_reinterpret_as_u32(a)); +} + +template +inline uint64 v_extract_n(v_uint64x4 a) +{ + return (uint64)_v256_extract_epi64(a.val); +} + +template +inline int64 v_extract_n(v_int64x4 v) +{ + return (int64)v_extract_n(v_reinterpret_as_u64(v)); +} + +template +inline float v_extract_n(v_float32x8 v) +{ + union { uint iv; float fv; } d; + d.iv = v_extract_n(v_reinterpret_as_u32(v)); + return d.fv; +} + +template +inline double v_extract_n(v_float64x4 v) +{ + union { uint64 iv; double dv; } d; + d.iv = v_extract_n(v_reinterpret_as_u64(v)); + return d.dv; +} + +template +inline v_uint32x8 v_broadcast_element(v_uint32x8 a) +{ + static const __m256i perm = _mm256_set1_epi32((char)i); + return v_uint32x8(_mm256_permutevar8x32_epi32(a.val, perm)); +} + +template +inline v_int32x8 v_broadcast_element(const v_int32x8 &a) +{ return v_reinterpret_as_s32(v_broadcast_element(v_reinterpret_as_u32(a))); } + +template +inline v_float32x8 v_broadcast_element(const v_float32x8 &a) +{ return v_reinterpret_as_f32(v_broadcast_element(v_reinterpret_as_u32(a))); } + + +///////////////////// load deinterleave ///////////////////////////// + +inline void v_load_deinterleave( const uchar* ptr, v_uint8x32& a, v_uint8x32& b ) +{ + __m256i ab0 = _mm256_loadu_si256((const __m256i*)ptr); + __m256i ab1 = _mm256_loadu_si256((const __m256i*)(ptr + 32)); + + const __m256i sh = _mm256_setr_epi8(0, 2, 4, 6, 8, 10, 12, 14, 1, 3, 5, 7, 9, 11, 13, 15, + 0, 2, 4, 6, 8, 10, 12, 14, 1, 3, 5, 7, 9, 11, 13, 15); + __m256i p0 = _mm256_shuffle_epi8(ab0, sh); + __m256i p1 = _mm256_shuffle_epi8(ab1, sh); + __m256i pl = _mm256_permute2x128_si256(p0, p1, 0 + 2*16); + __m256i ph = _mm256_permute2x128_si256(p0, p1, 1 + 3*16); + __m256i a0 = _mm256_unpacklo_epi64(pl, ph); + __m256i b0 = _mm256_unpackhi_epi64(pl, ph); + a = v_uint8x32(a0); + b = v_uint8x32(b0); +} + +inline void v_load_deinterleave( const ushort* ptr, v_uint16x16& a, v_uint16x16& b ) +{ + __m256i ab0 = _mm256_loadu_si256((const __m256i*)ptr); + __m256i ab1 = _mm256_loadu_si256((const __m256i*)(ptr + 16)); + + const __m256i sh = _mm256_setr_epi8(0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11, 14, 15, + 0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11, 14, 15); + __m256i p0 = _mm256_shuffle_epi8(ab0, sh); + __m256i p1 = _mm256_shuffle_epi8(ab1, sh); + __m256i pl = _mm256_permute2x128_si256(p0, p1, 0 + 2*16); + __m256i ph = _mm256_permute2x128_si256(p0, p1, 1 + 3*16); + __m256i a0 = _mm256_unpacklo_epi64(pl, ph); + __m256i b0 = _mm256_unpackhi_epi64(pl, ph); + a = v_uint16x16(a0); + b = v_uint16x16(b0); +} + +inline void v_load_deinterleave( const unsigned* ptr, v_uint32x8& a, v_uint32x8& b ) +{ + __m256i ab0 = _mm256_loadu_si256((const __m256i*)ptr); + __m256i ab1 = _mm256_loadu_si256((const __m256i*)(ptr + 8)); + + const int sh = 0+2*4+1*16+3*64; + __m256i p0 = _mm256_shuffle_epi32(ab0, sh); + __m256i p1 = _mm256_shuffle_epi32(ab1, sh); + __m256i pl = _mm256_permute2x128_si256(p0, p1, 0 + 2*16); + __m256i ph = _mm256_permute2x128_si256(p0, p1, 1 + 3*16); + __m256i a0 = _mm256_unpacklo_epi64(pl, ph); + __m256i b0 = _mm256_unpackhi_epi64(pl, ph); + a = v_uint32x8(a0); + b = v_uint32x8(b0); +} + +inline void v_load_deinterleave( const uint64* ptr, v_uint64x4& a, v_uint64x4& b ) +{ + __m256i ab0 = _mm256_loadu_si256((const __m256i*)ptr); + __m256i ab1 = _mm256_loadu_si256((const __m256i*)(ptr + 4)); + + __m256i pl = _mm256_permute2x128_si256(ab0, ab1, 0 + 2*16); + __m256i ph = _mm256_permute2x128_si256(ab0, ab1, 1 + 3*16); + __m256i a0 = _mm256_unpacklo_epi64(pl, ph); + __m256i b0 = _mm256_unpackhi_epi64(pl, ph); + a = v_uint64x4(a0); + b = v_uint64x4(b0); +} + +inline void v_load_deinterleave( const uchar* ptr, v_uint8x32& a, v_uint8x32& b, v_uint8x32& c ) +{ + __m256i bgr0 = _mm256_loadu_si256((const __m256i*)ptr); + __m256i bgr1 = _mm256_loadu_si256((const __m256i*)(ptr + 32)); + __m256i bgr2 = _mm256_loadu_si256((const __m256i*)(ptr + 64)); + + __m256i s02_low = _mm256_permute2x128_si256(bgr0, bgr2, 0 + 2*16); + __m256i s02_high = _mm256_permute2x128_si256(bgr0, bgr2, 1 + 3*16); + + const __m256i m0 = _mm256_setr_epi8(0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, + 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0); + const __m256i m1 = _mm256_setr_epi8(0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, + -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1); + + __m256i b0 = _mm256_blendv_epi8(_mm256_blendv_epi8(s02_low, s02_high, m0), bgr1, m1); + __m256i g0 = _mm256_blendv_epi8(_mm256_blendv_epi8(s02_high, s02_low, m1), bgr1, m0); + __m256i r0 = _mm256_blendv_epi8(_mm256_blendv_epi8(bgr1, s02_low, m0), s02_high, m1); + + const __m256i + sh_b = _mm256_setr_epi8(0, 3, 6, 9, 12, 15, 2, 5, 8, 11, 14, 1, 4, 7, 10, 13, + 0, 3, 6, 9, 12, 15, 2, 5, 8, 11, 14, 1, 4, 7, 10, 13), + sh_g = _mm256_setr_epi8(1, 4, 7, 10, 13, 0, 3, 6, 9, 12, 15, 2, 5, 8, 11, 14, + 1, 4, 7, 10, 13, 0, 3, 6, 9, 12, 15, 2, 5, 8, 11, 14), + sh_r = _mm256_setr_epi8(2, 5, 8, 11, 14, 1, 4, 7, 10, 13, 0, 3, 6, 9, 12, 15, + 2, 5, 8, 11, 14, 1, 4, 7, 10, 13, 0, 3, 6, 9, 12, 15); + b0 = _mm256_shuffle_epi8(b0, sh_b); + g0 = _mm256_shuffle_epi8(g0, sh_g); + r0 = _mm256_shuffle_epi8(r0, sh_r); + + a = v_uint8x32(b0); + b = v_uint8x32(g0); + c = v_uint8x32(r0); +} + +inline void v_load_deinterleave( const ushort* ptr, v_uint16x16& a, v_uint16x16& b, v_uint16x16& c ) +{ + __m256i bgr0 = _mm256_loadu_si256((const __m256i*)ptr); + __m256i bgr1 = _mm256_loadu_si256((const __m256i*)(ptr + 16)); + __m256i bgr2 = _mm256_loadu_si256((const __m256i*)(ptr + 32)); + + __m256i s02_low = _mm256_permute2x128_si256(bgr0, bgr2, 0 + 2*16); + __m256i s02_high = _mm256_permute2x128_si256(bgr0, bgr2, 1 + 3*16); + + const __m256i m0 = _mm256_setr_epi8(0, 0, -1, -1, 0, 0, 0, 0, -1, -1, 0, 0, 0, 0, -1, -1, + 0, 0, 0, 0, -1, -1, 0, 0, 0, 0, -1, -1, 0, 0, 0, 0); + const __m256i m1 = _mm256_setr_epi8(0, 0, 0, 0, -1, -1, 0, 0, 0, 0, -1, -1, 0, 0, 0, 0, + -1, -1, 0, 0, 0, 0, -1, -1, 0, 0, 0, 0, -1, -1, 0, 0); + __m256i b0 = _mm256_blendv_epi8(_mm256_blendv_epi8(s02_low, s02_high, m0), bgr1, m1); + __m256i g0 = _mm256_blendv_epi8(_mm256_blendv_epi8(bgr1, s02_low, m0), s02_high, m1); + __m256i r0 = _mm256_blendv_epi8(_mm256_blendv_epi8(s02_high, s02_low, m1), bgr1, m0); + const __m256i sh_b = _mm256_setr_epi8(0, 1, 6, 7, 12, 13, 2, 3, 8, 9, 14, 15, 4, 5, 10, 11, + 0, 1, 6, 7, 12, 13, 2, 3, 8, 9, 14, 15, 4, 5, 10, 11); + const __m256i sh_g = _mm256_setr_epi8(2, 3, 8, 9, 14, 15, 4, 5, 10, 11, 0, 1, 6, 7, 12, 13, + 2, 3, 8, 9, 14, 15, 4, 5, 10, 11, 0, 1, 6, 7, 12, 13); + const __m256i sh_r = _mm256_setr_epi8(4, 5, 10, 11, 0, 1, 6, 7, 12, 13, 2, 3, 8, 9, 14, 15, + 4, 5, 10, 11, 0, 1, 6, 7, 12, 13, 2, 3, 8, 9, 14, 15); + b0 = _mm256_shuffle_epi8(b0, sh_b); + g0 = _mm256_shuffle_epi8(g0, sh_g); + r0 = _mm256_shuffle_epi8(r0, sh_r); + + a = v_uint16x16(b0); + b = v_uint16x16(g0); + c = v_uint16x16(r0); +} + +inline void v_load_deinterleave( const unsigned* ptr, v_uint32x8& a, v_uint32x8& b, v_uint32x8& c ) +{ + __m256i bgr0 = _mm256_loadu_si256((const __m256i*)ptr); + __m256i bgr1 = _mm256_loadu_si256((const __m256i*)(ptr + 8)); + __m256i bgr2 = _mm256_loadu_si256((const __m256i*)(ptr + 16)); + + __m256i s02_low = _mm256_permute2x128_si256(bgr0, bgr2, 0 + 2*16); + __m256i s02_high = _mm256_permute2x128_si256(bgr0, bgr2, 1 + 3*16); + + __m256i b0 = _mm256_blend_epi32(_mm256_blend_epi32(s02_low, s02_high, 0x24), bgr1, 0x92); + __m256i g0 = _mm256_blend_epi32(_mm256_blend_epi32(s02_high, s02_low, 0x92), bgr1, 0x24); + __m256i r0 = _mm256_blend_epi32(_mm256_blend_epi32(bgr1, s02_low, 0x24), s02_high, 0x92); + + b0 = _mm256_shuffle_epi32(b0, 0x6c); + g0 = _mm256_shuffle_epi32(g0, 0xb1); + r0 = _mm256_shuffle_epi32(r0, 0xc6); + + a = v_uint32x8(b0); + b = v_uint32x8(g0); + c = v_uint32x8(r0); +} + +inline void v_load_deinterleave( const uint64* ptr, v_uint64x4& a, v_uint64x4& b, v_uint64x4& c ) +{ + __m256i bgr0 = _mm256_loadu_si256((const __m256i*)ptr); + __m256i bgr1 = _mm256_loadu_si256((const __m256i*)(ptr + 4)); + __m256i bgr2 = _mm256_loadu_si256((const __m256i*)(ptr + 8)); + + __m256i s01 = _mm256_blend_epi32(bgr0, bgr1, 0xf0); + __m256i s12 = _mm256_blend_epi32(bgr1, bgr2, 0xf0); + __m256i s20r = _mm256_permute4x64_epi64(_mm256_blend_epi32(bgr2, bgr0, 0xf0), 0x1b); + __m256i b0 = _mm256_unpacklo_epi64(s01, s20r); + __m256i g0 = _mm256_alignr_epi8(s12, s01, 8); + __m256i r0 = _mm256_unpackhi_epi64(s20r, s12); + + a = v_uint64x4(b0); + b = v_uint64x4(g0); + c = v_uint64x4(r0); +} + +inline void v_load_deinterleave( const uchar* ptr, v_uint8x32& a, v_uint8x32& b, v_uint8x32& c, v_uint8x32& d ) +{ + __m256i bgr0 = _mm256_loadu_si256((const __m256i*)ptr); + __m256i bgr1 = _mm256_loadu_si256((const __m256i*)(ptr + 32)); + __m256i bgr2 = _mm256_loadu_si256((const __m256i*)(ptr + 64)); + __m256i bgr3 = _mm256_loadu_si256((const __m256i*)(ptr + 96)); + const __m256i sh = _mm256_setr_epi8(0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15, + 0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15); + + __m256i p0 = _mm256_shuffle_epi8(bgr0, sh); + __m256i p1 = _mm256_shuffle_epi8(bgr1, sh); + __m256i p2 = _mm256_shuffle_epi8(bgr2, sh); + __m256i p3 = _mm256_shuffle_epi8(bgr3, sh); + + __m256i p01l = _mm256_unpacklo_epi32(p0, p1); + __m256i p01h = _mm256_unpackhi_epi32(p0, p1); + __m256i p23l = _mm256_unpacklo_epi32(p2, p3); + __m256i p23h = _mm256_unpackhi_epi32(p2, p3); + + __m256i pll = _mm256_permute2x128_si256(p01l, p23l, 0 + 2*16); + __m256i plh = _mm256_permute2x128_si256(p01l, p23l, 1 + 3*16); + __m256i phl = _mm256_permute2x128_si256(p01h, p23h, 0 + 2*16); + __m256i phh = _mm256_permute2x128_si256(p01h, p23h, 1 + 3*16); + + __m256i b0 = _mm256_unpacklo_epi32(pll, plh); + __m256i g0 = _mm256_unpackhi_epi32(pll, plh); + __m256i r0 = _mm256_unpacklo_epi32(phl, phh); + __m256i a0 = _mm256_unpackhi_epi32(phl, phh); + + a = v_uint8x32(b0); + b = v_uint8x32(g0); + c = v_uint8x32(r0); + d = v_uint8x32(a0); +} + +inline void v_load_deinterleave( const ushort* ptr, v_uint16x16& a, v_uint16x16& b, v_uint16x16& c, v_uint16x16& d ) +{ + __m256i bgr0 = _mm256_loadu_si256((const __m256i*)ptr); + __m256i bgr1 = _mm256_loadu_si256((const __m256i*)(ptr + 16)); + __m256i bgr2 = _mm256_loadu_si256((const __m256i*)(ptr + 32)); + __m256i bgr3 = _mm256_loadu_si256((const __m256i*)(ptr + 48)); + const __m256i sh = _mm256_setr_epi8(0, 1, 8, 9, 2, 3, 10, 11, 4, 5, 12, 13, 6, 7, 14, 15, + 0, 1, 8, 9, 2, 3, 10, 11, 4, 5, 12, 13, 6, 7, 14, 15); + __m256i p0 = _mm256_shuffle_epi8(bgr0, sh); + __m256i p1 = _mm256_shuffle_epi8(bgr1, sh); + __m256i p2 = _mm256_shuffle_epi8(bgr2, sh); + __m256i p3 = _mm256_shuffle_epi8(bgr3, sh); + + __m256i p01l = _mm256_unpacklo_epi32(p0, p1); + __m256i p01h = _mm256_unpackhi_epi32(p0, p1); + __m256i p23l = _mm256_unpacklo_epi32(p2, p3); + __m256i p23h = _mm256_unpackhi_epi32(p2, p3); + + __m256i pll = _mm256_permute2x128_si256(p01l, p23l, 0 + 2*16); + __m256i plh = _mm256_permute2x128_si256(p01l, p23l, 1 + 3*16); + __m256i phl = _mm256_permute2x128_si256(p01h, p23h, 0 + 2*16); + __m256i phh = _mm256_permute2x128_si256(p01h, p23h, 1 + 3*16); + + __m256i b0 = _mm256_unpacklo_epi32(pll, plh); + __m256i g0 = _mm256_unpackhi_epi32(pll, plh); + __m256i r0 = _mm256_unpacklo_epi32(phl, phh); + __m256i a0 = _mm256_unpackhi_epi32(phl, phh); + + a = v_uint16x16(b0); + b = v_uint16x16(g0); + c = v_uint16x16(r0); + d = v_uint16x16(a0); +} + +inline void v_load_deinterleave( const unsigned* ptr, v_uint32x8& a, v_uint32x8& b, v_uint32x8& c, v_uint32x8& d ) +{ + __m256i p0 = _mm256_loadu_si256((const __m256i*)ptr); + __m256i p1 = _mm256_loadu_si256((const __m256i*)(ptr + 8)); + __m256i p2 = _mm256_loadu_si256((const __m256i*)(ptr + 16)); + __m256i p3 = _mm256_loadu_si256((const __m256i*)(ptr + 24)); + + __m256i p01l = _mm256_unpacklo_epi32(p0, p1); + __m256i p01h = _mm256_unpackhi_epi32(p0, p1); + __m256i p23l = _mm256_unpacklo_epi32(p2, p3); + __m256i p23h = _mm256_unpackhi_epi32(p2, p3); + + __m256i pll = _mm256_permute2x128_si256(p01l, p23l, 0 + 2*16); + __m256i plh = _mm256_permute2x128_si256(p01l, p23l, 1 + 3*16); + __m256i phl = _mm256_permute2x128_si256(p01h, p23h, 0 + 2*16); + __m256i phh = _mm256_permute2x128_si256(p01h, p23h, 1 + 3*16); + + __m256i b0 = _mm256_unpacklo_epi32(pll, plh); + __m256i g0 = _mm256_unpackhi_epi32(pll, plh); + __m256i r0 = _mm256_unpacklo_epi32(phl, phh); + __m256i a0 = _mm256_unpackhi_epi32(phl, phh); + + a = v_uint32x8(b0); + b = v_uint32x8(g0); + c = v_uint32x8(r0); + d = v_uint32x8(a0); +} + +inline void v_load_deinterleave( const uint64* ptr, v_uint64x4& a, v_uint64x4& b, v_uint64x4& c, v_uint64x4& d ) +{ + __m256i bgra0 = _mm256_loadu_si256((const __m256i*)ptr); + __m256i bgra1 = _mm256_loadu_si256((const __m256i*)(ptr + 4)); + __m256i bgra2 = _mm256_loadu_si256((const __m256i*)(ptr + 8)); + __m256i bgra3 = _mm256_loadu_si256((const __m256i*)(ptr + 12)); + + __m256i l02 = _mm256_permute2x128_si256(bgra0, bgra2, 0 + 2*16); + __m256i h02 = _mm256_permute2x128_si256(bgra0, bgra2, 1 + 3*16); + __m256i l13 = _mm256_permute2x128_si256(bgra1, bgra3, 0 + 2*16); + __m256i h13 = _mm256_permute2x128_si256(bgra1, bgra3, 1 + 3*16); + + __m256i b0 = _mm256_unpacklo_epi64(l02, l13); + __m256i g0 = _mm256_unpackhi_epi64(l02, l13); + __m256i r0 = _mm256_unpacklo_epi64(h02, h13); + __m256i a0 = _mm256_unpackhi_epi64(h02, h13); + + a = v_uint64x4(b0); + b = v_uint64x4(g0); + c = v_uint64x4(r0); + d = v_uint64x4(a0); +} + +///////////////////////////// store interleave ///////////////////////////////////// + +inline void v_store_interleave( uchar* ptr, const v_uint8x32& x, const v_uint8x32& y, + hal::StoreMode mode=hal::STORE_UNALIGNED ) +{ + __m256i xy_l = _mm256_unpacklo_epi8(x.val, y.val); + __m256i xy_h = _mm256_unpackhi_epi8(x.val, y.val); + + __m256i xy0 = _mm256_permute2x128_si256(xy_l, xy_h, 0 + 2*16); + __m256i xy1 = _mm256_permute2x128_si256(xy_l, xy_h, 1 + 3*16); + + if( mode == hal::STORE_ALIGNED_NOCACHE ) + { + _mm256_stream_si256((__m256i*)ptr, xy0); + _mm256_stream_si256((__m256i*)(ptr + 32), xy1); + } + else if( mode == hal::STORE_ALIGNED ) + { + _mm256_store_si256((__m256i*)ptr, xy0); + _mm256_store_si256((__m256i*)(ptr + 32), xy1); + } + else + { + _mm256_storeu_si256((__m256i*)ptr, xy0); + _mm256_storeu_si256((__m256i*)(ptr + 32), xy1); + } +} + +inline void v_store_interleave( ushort* ptr, const v_uint16x16& x, const v_uint16x16& y, + hal::StoreMode mode=hal::STORE_UNALIGNED ) +{ + __m256i xy_l = _mm256_unpacklo_epi16(x.val, y.val); + __m256i xy_h = _mm256_unpackhi_epi16(x.val, y.val); + + __m256i xy0 = _mm256_permute2x128_si256(xy_l, xy_h, 0 + 2*16); + __m256i xy1 = _mm256_permute2x128_si256(xy_l, xy_h, 1 + 3*16); + + if( mode == hal::STORE_ALIGNED_NOCACHE ) + { + _mm256_stream_si256((__m256i*)ptr, xy0); + _mm256_stream_si256((__m256i*)(ptr + 16), xy1); + } + else if( mode == hal::STORE_ALIGNED ) + { + _mm256_store_si256((__m256i*)ptr, xy0); + _mm256_store_si256((__m256i*)(ptr + 16), xy1); + } + else + { + _mm256_storeu_si256((__m256i*)ptr, xy0); + _mm256_storeu_si256((__m256i*)(ptr + 16), xy1); + } +} + +inline void v_store_interleave( unsigned* ptr, const v_uint32x8& x, const v_uint32x8& y, + hal::StoreMode mode=hal::STORE_UNALIGNED ) +{ + __m256i xy_l = _mm256_unpacklo_epi32(x.val, y.val); + __m256i xy_h = _mm256_unpackhi_epi32(x.val, y.val); + + __m256i xy0 = _mm256_permute2x128_si256(xy_l, xy_h, 0 + 2*16); + __m256i xy1 = _mm256_permute2x128_si256(xy_l, xy_h, 1 + 3*16); + + if( mode == hal::STORE_ALIGNED_NOCACHE ) + { + _mm256_stream_si256((__m256i*)ptr, xy0); + _mm256_stream_si256((__m256i*)(ptr + 8), xy1); + } + else if( mode == hal::STORE_ALIGNED ) + { + _mm256_store_si256((__m256i*)ptr, xy0); + _mm256_store_si256((__m256i*)(ptr + 8), xy1); + } + else + { + _mm256_storeu_si256((__m256i*)ptr, xy0); + _mm256_storeu_si256((__m256i*)(ptr + 8), xy1); + } +} + +inline void v_store_interleave( uint64* ptr, const v_uint64x4& x, const v_uint64x4& y, + hal::StoreMode mode=hal::STORE_UNALIGNED ) +{ + __m256i xy_l = _mm256_unpacklo_epi64(x.val, y.val); + __m256i xy_h = _mm256_unpackhi_epi64(x.val, y.val); + + __m256i xy0 = _mm256_permute2x128_si256(xy_l, xy_h, 0 + 2*16); + __m256i xy1 = _mm256_permute2x128_si256(xy_l, xy_h, 1 + 3*16); + + if( mode == hal::STORE_ALIGNED_NOCACHE ) + { + _mm256_stream_si256((__m256i*)ptr, xy0); + _mm256_stream_si256((__m256i*)(ptr + 4), xy1); + } + else if( mode == hal::STORE_ALIGNED ) + { + _mm256_store_si256((__m256i*)ptr, xy0); + _mm256_store_si256((__m256i*)(ptr + 4), xy1); + } + else + { + _mm256_storeu_si256((__m256i*)ptr, xy0); + _mm256_storeu_si256((__m256i*)(ptr + 4), xy1); + } +} + +inline void v_store_interleave( uchar* ptr, const v_uint8x32& a, const v_uint8x32& b, const v_uint8x32& c, + hal::StoreMode mode=hal::STORE_UNALIGNED ) +{ + const __m256i sh_b = _mm256_setr_epi8( + 0, 11, 6, 1, 12, 7, 2, 13, 8, 3, 14, 9, 4, 15, 10, 5, + 0, 11, 6, 1, 12, 7, 2, 13, 8, 3, 14, 9, 4, 15, 10, 5); + const __m256i sh_g = _mm256_setr_epi8( + 5, 0, 11, 6, 1, 12, 7, 2, 13, 8, 3, 14, 9, 4, 15, 10, + 5, 0, 11, 6, 1, 12, 7, 2, 13, 8, 3, 14, 9, 4, 15, 10); + const __m256i sh_r = _mm256_setr_epi8( + 10, 5, 0, 11, 6, 1, 12, 7, 2, 13, 8, 3, 14, 9, 4, 15, + 10, 5, 0, 11, 6, 1, 12, 7, 2, 13, 8, 3, 14, 9, 4, 15); + + __m256i b0 = _mm256_shuffle_epi8(a.val, sh_b); + __m256i g0 = _mm256_shuffle_epi8(b.val, sh_g); + __m256i r0 = _mm256_shuffle_epi8(c.val, sh_r); + + const __m256i m0 = _mm256_setr_epi8(0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, + 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0); + const __m256i m1 = _mm256_setr_epi8(0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, + 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0); + + __m256i p0 = _mm256_blendv_epi8(_mm256_blendv_epi8(b0, g0, m0), r0, m1); + __m256i p1 = _mm256_blendv_epi8(_mm256_blendv_epi8(g0, r0, m0), b0, m1); + __m256i p2 = _mm256_blendv_epi8(_mm256_blendv_epi8(r0, b0, m0), g0, m1); + + __m256i bgr0 = _mm256_permute2x128_si256(p0, p1, 0 + 2*16); + __m256i bgr1 = _mm256_permute2x128_si256(p2, p0, 0 + 3*16); + __m256i bgr2 = _mm256_permute2x128_si256(p1, p2, 1 + 3*16); + + if( mode == hal::STORE_ALIGNED_NOCACHE ) + { + _mm256_stream_si256((__m256i*)ptr, bgr0); + _mm256_stream_si256((__m256i*)(ptr + 32), bgr1); + _mm256_stream_si256((__m256i*)(ptr + 64), bgr2); + } + else if( mode == hal::STORE_ALIGNED ) + { + _mm256_store_si256((__m256i*)ptr, bgr0); + _mm256_store_si256((__m256i*)(ptr + 32), bgr1); + _mm256_store_si256((__m256i*)(ptr + 64), bgr2); + } + else + { + _mm256_storeu_si256((__m256i*)ptr, bgr0); + _mm256_storeu_si256((__m256i*)(ptr + 32), bgr1); + _mm256_storeu_si256((__m256i*)(ptr + 64), bgr2); + } +} + +inline void v_store_interleave( ushort* ptr, const v_uint16x16& a, const v_uint16x16& b, const v_uint16x16& c, + hal::StoreMode mode=hal::STORE_UNALIGNED ) +{ + const __m256i sh_b = _mm256_setr_epi8( + 0, 1, 6, 7, 12, 13, 2, 3, 8, 9, 14, 15, 4, 5, 10, 11, + 0, 1, 6, 7, 12, 13, 2, 3, 8, 9, 14, 15, 4, 5, 10, 11); + const __m256i sh_g = _mm256_setr_epi8( + 10, 11, 0, 1, 6, 7, 12, 13, 2, 3, 8, 9, 14, 15, 4, 5, + 10, 11, 0, 1, 6, 7, 12, 13, 2, 3, 8, 9, 14, 15, 4, 5); + const __m256i sh_r = _mm256_setr_epi8( + 4, 5, 10, 11, 0, 1, 6, 7, 12, 13, 2, 3, 8, 9, 14, 15, + 4, 5, 10, 11, 0, 1, 6, 7, 12, 13, 2, 3, 8, 9, 14, 15); + + __m256i b0 = _mm256_shuffle_epi8(a.val, sh_b); + __m256i g0 = _mm256_shuffle_epi8(b.val, sh_g); + __m256i r0 = _mm256_shuffle_epi8(c.val, sh_r); + + const __m256i m0 = _mm256_setr_epi8(0, 0, -1, -1, 0, 0, 0, 0, -1, -1, 0, 0, 0, 0, -1, -1, + 0, 0, 0, 0, -1, -1, 0, 0, 0, 0, -1, -1, 0, 0, 0, 0); + const __m256i m1 = _mm256_setr_epi8(0, 0, 0, 0, -1, -1, 0, 0, 0, 0, -1, -1, 0, 0, 0, 0, + -1, -1, 0, 0, 0, 0, -1, -1, 0, 0, 0, 0, -1, -1, 0, 0); + + __m256i p0 = _mm256_blendv_epi8(_mm256_blendv_epi8(b0, g0, m0), r0, m1); + __m256i p1 = _mm256_blendv_epi8(_mm256_blendv_epi8(g0, r0, m0), b0, m1); + __m256i p2 = _mm256_blendv_epi8(_mm256_blendv_epi8(r0, b0, m0), g0, m1); + + __m256i bgr0 = _mm256_permute2x128_si256(p0, p2, 0 + 2*16); + //__m256i bgr1 = p1; + __m256i bgr2 = _mm256_permute2x128_si256(p0, p2, 1 + 3*16); + + if( mode == hal::STORE_ALIGNED_NOCACHE ) + { + _mm256_stream_si256((__m256i*)ptr, bgr0); + _mm256_stream_si256((__m256i*)(ptr + 16), p1); + _mm256_stream_si256((__m256i*)(ptr + 32), bgr2); + } + else if( mode == hal::STORE_ALIGNED ) + { + _mm256_store_si256((__m256i*)ptr, bgr0); + _mm256_store_si256((__m256i*)(ptr + 16), p1); + _mm256_store_si256((__m256i*)(ptr + 32), bgr2); + } + else + { + _mm256_storeu_si256((__m256i*)ptr, bgr0); + _mm256_storeu_si256((__m256i*)(ptr + 16), p1); + _mm256_storeu_si256((__m256i*)(ptr + 32), bgr2); + } +} + +inline void v_store_interleave( unsigned* ptr, const v_uint32x8& a, const v_uint32x8& b, const v_uint32x8& c, + hal::StoreMode mode=hal::STORE_UNALIGNED ) +{ + __m256i b0 = _mm256_shuffle_epi32(a.val, 0x6c); + __m256i g0 = _mm256_shuffle_epi32(b.val, 0xb1); + __m256i r0 = _mm256_shuffle_epi32(c.val, 0xc6); + + __m256i p0 = _mm256_blend_epi32(_mm256_blend_epi32(b0, g0, 0x92), r0, 0x24); + __m256i p1 = _mm256_blend_epi32(_mm256_blend_epi32(g0, r0, 0x92), b0, 0x24); + __m256i p2 = _mm256_blend_epi32(_mm256_blend_epi32(r0, b0, 0x92), g0, 0x24); + + __m256i bgr0 = _mm256_permute2x128_si256(p0, p1, 0 + 2*16); + //__m256i bgr1 = p2; + __m256i bgr2 = _mm256_permute2x128_si256(p0, p1, 1 + 3*16); + + if( mode == hal::STORE_ALIGNED_NOCACHE ) + { + _mm256_stream_si256((__m256i*)ptr, bgr0); + _mm256_stream_si256((__m256i*)(ptr + 8), p2); + _mm256_stream_si256((__m256i*)(ptr + 16), bgr2); + } + else if( mode == hal::STORE_ALIGNED ) + { + _mm256_store_si256((__m256i*)ptr, bgr0); + _mm256_store_si256((__m256i*)(ptr + 8), p2); + _mm256_store_si256((__m256i*)(ptr + 16), bgr2); + } + else + { + _mm256_storeu_si256((__m256i*)ptr, bgr0); + _mm256_storeu_si256((__m256i*)(ptr + 8), p2); + _mm256_storeu_si256((__m256i*)(ptr + 16), bgr2); + } +} + +inline void v_store_interleave( uint64* ptr, const v_uint64x4& a, const v_uint64x4& b, const v_uint64x4& c, + hal::StoreMode mode=hal::STORE_UNALIGNED ) +{ + __m256i s01 = _mm256_unpacklo_epi64(a.val, b.val); + __m256i s12 = _mm256_unpackhi_epi64(b.val, c.val); + __m256i s20 = _mm256_blend_epi32(c.val, a.val, 0xcc); + + __m256i bgr0 = _mm256_permute2x128_si256(s01, s20, 0 + 2*16); + __m256i bgr1 = _mm256_blend_epi32(s01, s12, 0x0f); + __m256i bgr2 = _mm256_permute2x128_si256(s20, s12, 1 + 3*16); + + if( mode == hal::STORE_ALIGNED_NOCACHE ) + { + _mm256_stream_si256((__m256i*)ptr, bgr0); + _mm256_stream_si256((__m256i*)(ptr + 4), bgr1); + _mm256_stream_si256((__m256i*)(ptr + 8), bgr2); + } + else if( mode == hal::STORE_ALIGNED ) + { + _mm256_store_si256((__m256i*)ptr, bgr0); + _mm256_store_si256((__m256i*)(ptr + 4), bgr1); + _mm256_store_si256((__m256i*)(ptr + 8), bgr2); + } + else + { + _mm256_storeu_si256((__m256i*)ptr, bgr0); + _mm256_storeu_si256((__m256i*)(ptr + 4), bgr1); + _mm256_storeu_si256((__m256i*)(ptr + 8), bgr2); + } +} + +inline void v_store_interleave( uchar* ptr, const v_uint8x32& a, const v_uint8x32& b, + const v_uint8x32& c, const v_uint8x32& d, + hal::StoreMode mode=hal::STORE_UNALIGNED ) +{ + __m256i bg0 = _mm256_unpacklo_epi8(a.val, b.val); + __m256i bg1 = _mm256_unpackhi_epi8(a.val, b.val); + __m256i ra0 = _mm256_unpacklo_epi8(c.val, d.val); + __m256i ra1 = _mm256_unpackhi_epi8(c.val, d.val); + + __m256i bgra0_ = _mm256_unpacklo_epi16(bg0, ra0); + __m256i bgra1_ = _mm256_unpackhi_epi16(bg0, ra0); + __m256i bgra2_ = _mm256_unpacklo_epi16(bg1, ra1); + __m256i bgra3_ = _mm256_unpackhi_epi16(bg1, ra1); + + __m256i bgra0 = _mm256_permute2x128_si256(bgra0_, bgra1_, 0 + 2*16); + __m256i bgra2 = _mm256_permute2x128_si256(bgra0_, bgra1_, 1 + 3*16); + __m256i bgra1 = _mm256_permute2x128_si256(bgra2_, bgra3_, 0 + 2*16); + __m256i bgra3 = _mm256_permute2x128_si256(bgra2_, bgra3_, 1 + 3*16); + + if( mode == hal::STORE_ALIGNED_NOCACHE ) + { + _mm256_stream_si256((__m256i*)ptr, bgra0); + _mm256_stream_si256((__m256i*)(ptr + 32), bgra1); + _mm256_stream_si256((__m256i*)(ptr + 64), bgra2); + _mm256_stream_si256((__m256i*)(ptr + 96), bgra3); + } + else if( mode == hal::STORE_ALIGNED ) + { + _mm256_store_si256((__m256i*)ptr, bgra0); + _mm256_store_si256((__m256i*)(ptr + 32), bgra1); + _mm256_store_si256((__m256i*)(ptr + 64), bgra2); + _mm256_store_si256((__m256i*)(ptr + 96), bgra3); + } + else + { + _mm256_storeu_si256((__m256i*)ptr, bgra0); + _mm256_storeu_si256((__m256i*)(ptr + 32), bgra1); + _mm256_storeu_si256((__m256i*)(ptr + 64), bgra2); + _mm256_storeu_si256((__m256i*)(ptr + 96), bgra3); + } +} + +inline void v_store_interleave( ushort* ptr, const v_uint16x16& a, const v_uint16x16& b, + const v_uint16x16& c, const v_uint16x16& d, + hal::StoreMode mode=hal::STORE_UNALIGNED ) +{ + __m256i bg0 = _mm256_unpacklo_epi16(a.val, b.val); + __m256i bg1 = _mm256_unpackhi_epi16(a.val, b.val); + __m256i ra0 = _mm256_unpacklo_epi16(c.val, d.val); + __m256i ra1 = _mm256_unpackhi_epi16(c.val, d.val); + + __m256i bgra0_ = _mm256_unpacklo_epi32(bg0, ra0); + __m256i bgra1_ = _mm256_unpackhi_epi32(bg0, ra0); + __m256i bgra2_ = _mm256_unpacklo_epi32(bg1, ra1); + __m256i bgra3_ = _mm256_unpackhi_epi32(bg1, ra1); + + __m256i bgra0 = _mm256_permute2x128_si256(bgra0_, bgra1_, 0 + 2*16); + __m256i bgra2 = _mm256_permute2x128_si256(bgra0_, bgra1_, 1 + 3*16); + __m256i bgra1 = _mm256_permute2x128_si256(bgra2_, bgra3_, 0 + 2*16); + __m256i bgra3 = _mm256_permute2x128_si256(bgra2_, bgra3_, 1 + 3*16); + + if( mode == hal::STORE_ALIGNED_NOCACHE ) + { + _mm256_stream_si256((__m256i*)ptr, bgra0); + _mm256_stream_si256((__m256i*)(ptr + 16), bgra1); + _mm256_stream_si256((__m256i*)(ptr + 32), bgra2); + _mm256_stream_si256((__m256i*)(ptr + 48), bgra3); + } + else if( mode == hal::STORE_ALIGNED ) + { + _mm256_store_si256((__m256i*)ptr, bgra0); + _mm256_store_si256((__m256i*)(ptr + 16), bgra1); + _mm256_store_si256((__m256i*)(ptr + 32), bgra2); + _mm256_store_si256((__m256i*)(ptr + 48), bgra3); + } + else + { + _mm256_storeu_si256((__m256i*)ptr, bgra0); + _mm256_storeu_si256((__m256i*)(ptr + 16), bgra1); + _mm256_storeu_si256((__m256i*)(ptr + 32), bgra2); + _mm256_storeu_si256((__m256i*)(ptr + 48), bgra3); + } +} + +inline void v_store_interleave( unsigned* ptr, const v_uint32x8& a, const v_uint32x8& b, + const v_uint32x8& c, const v_uint32x8& d, + hal::StoreMode mode=hal::STORE_UNALIGNED ) +{ + __m256i bg0 = _mm256_unpacklo_epi32(a.val, b.val); + __m256i bg1 = _mm256_unpackhi_epi32(a.val, b.val); + __m256i ra0 = _mm256_unpacklo_epi32(c.val, d.val); + __m256i ra1 = _mm256_unpackhi_epi32(c.val, d.val); + + __m256i bgra0_ = _mm256_unpacklo_epi64(bg0, ra0); + __m256i bgra1_ = _mm256_unpackhi_epi64(bg0, ra0); + __m256i bgra2_ = _mm256_unpacklo_epi64(bg1, ra1); + __m256i bgra3_ = _mm256_unpackhi_epi64(bg1, ra1); + + __m256i bgra0 = _mm256_permute2x128_si256(bgra0_, bgra1_, 0 + 2*16); + __m256i bgra2 = _mm256_permute2x128_si256(bgra0_, bgra1_, 1 + 3*16); + __m256i bgra1 = _mm256_permute2x128_si256(bgra2_, bgra3_, 0 + 2*16); + __m256i bgra3 = _mm256_permute2x128_si256(bgra2_, bgra3_, 1 + 3*16); + + if( mode == hal::STORE_ALIGNED_NOCACHE ) + { + _mm256_stream_si256((__m256i*)ptr, bgra0); + _mm256_stream_si256((__m256i*)(ptr + 8), bgra1); + _mm256_stream_si256((__m256i*)(ptr + 16), bgra2); + _mm256_stream_si256((__m256i*)(ptr + 24), bgra3); + } + else if( mode == hal::STORE_ALIGNED ) + { + _mm256_store_si256((__m256i*)ptr, bgra0); + _mm256_store_si256((__m256i*)(ptr + 8), bgra1); + _mm256_store_si256((__m256i*)(ptr + 16), bgra2); + _mm256_store_si256((__m256i*)(ptr + 24), bgra3); + } + else + { + _mm256_storeu_si256((__m256i*)ptr, bgra0); + _mm256_storeu_si256((__m256i*)(ptr + 8), bgra1); + _mm256_storeu_si256((__m256i*)(ptr + 16), bgra2); + _mm256_storeu_si256((__m256i*)(ptr + 24), bgra3); + } +} + +inline void v_store_interleave( uint64* ptr, const v_uint64x4& a, const v_uint64x4& b, + const v_uint64x4& c, const v_uint64x4& d, + hal::StoreMode mode=hal::STORE_UNALIGNED ) +{ + __m256i bg0 = _mm256_unpacklo_epi64(a.val, b.val); + __m256i bg1 = _mm256_unpackhi_epi64(a.val, b.val); + __m256i ra0 = _mm256_unpacklo_epi64(c.val, d.val); + __m256i ra1 = _mm256_unpackhi_epi64(c.val, d.val); + + __m256i bgra0 = _mm256_permute2x128_si256(bg0, ra0, 0 + 2*16); + __m256i bgra1 = _mm256_permute2x128_si256(bg1, ra1, 0 + 2*16); + __m256i bgra2 = _mm256_permute2x128_si256(bg0, ra0, 1 + 3*16); + __m256i bgra3 = _mm256_permute2x128_si256(bg1, ra1, 1 + 3*16); + + if( mode == hal::STORE_ALIGNED_NOCACHE ) + { + _mm256_stream_si256((__m256i*)ptr, bgra0); + _mm256_stream_si256((__m256i*)(ptr + 4), bgra1); + _mm256_stream_si256((__m256i*)(ptr + 8), bgra2); + _mm256_stream_si256((__m256i*)(ptr + 12), bgra3); + } + else if( mode == hal::STORE_ALIGNED ) + { + _mm256_store_si256((__m256i*)ptr, bgra0); + _mm256_store_si256((__m256i*)(ptr + 4), bgra1); + _mm256_store_si256((__m256i*)(ptr + 8), bgra2); + _mm256_store_si256((__m256i*)(ptr + 12), bgra3); + } + else + { + _mm256_storeu_si256((__m256i*)ptr, bgra0); + _mm256_storeu_si256((__m256i*)(ptr + 4), bgra1); + _mm256_storeu_si256((__m256i*)(ptr + 8), bgra2); + _mm256_storeu_si256((__m256i*)(ptr + 12), bgra3); + } +} + +#define OPENCV_HAL_IMPL_AVX_LOADSTORE_INTERLEAVE(_Tpvec0, _Tp0, suffix0, _Tpvec1, _Tp1, suffix1) \ +inline void v_load_deinterleave( const _Tp0* ptr, _Tpvec0& a0, _Tpvec0& b0 ) \ +{ \ + _Tpvec1 a1, b1; \ + v_load_deinterleave((const _Tp1*)ptr, a1, b1); \ + a0 = v_reinterpret_as_##suffix0(a1); \ + b0 = v_reinterpret_as_##suffix0(b1); \ +} \ +inline void v_load_deinterleave( const _Tp0* ptr, _Tpvec0& a0, _Tpvec0& b0, _Tpvec0& c0 ) \ +{ \ + _Tpvec1 a1, b1, c1; \ + v_load_deinterleave((const _Tp1*)ptr, a1, b1, c1); \ + a0 = v_reinterpret_as_##suffix0(a1); \ + b0 = v_reinterpret_as_##suffix0(b1); \ + c0 = v_reinterpret_as_##suffix0(c1); \ +} \ +inline void v_load_deinterleave( const _Tp0* ptr, _Tpvec0& a0, _Tpvec0& b0, _Tpvec0& c0, _Tpvec0& d0 ) \ +{ \ + _Tpvec1 a1, b1, c1, d1; \ + v_load_deinterleave((const _Tp1*)ptr, a1, b1, c1, d1); \ + a0 = v_reinterpret_as_##suffix0(a1); \ + b0 = v_reinterpret_as_##suffix0(b1); \ + c0 = v_reinterpret_as_##suffix0(c1); \ + d0 = v_reinterpret_as_##suffix0(d1); \ +} \ +inline void v_store_interleave( _Tp0* ptr, const _Tpvec0& a0, const _Tpvec0& b0, \ + hal::StoreMode mode=hal::STORE_UNALIGNED ) \ +{ \ + _Tpvec1 a1 = v_reinterpret_as_##suffix1(a0); \ + _Tpvec1 b1 = v_reinterpret_as_##suffix1(b0); \ + v_store_interleave((_Tp1*)ptr, a1, b1, mode); \ +} \ +inline void v_store_interleave( _Tp0* ptr, const _Tpvec0& a0, const _Tpvec0& b0, const _Tpvec0& c0, \ + hal::StoreMode mode=hal::STORE_UNALIGNED ) \ +{ \ + _Tpvec1 a1 = v_reinterpret_as_##suffix1(a0); \ + _Tpvec1 b1 = v_reinterpret_as_##suffix1(b0); \ + _Tpvec1 c1 = v_reinterpret_as_##suffix1(c0); \ + v_store_interleave((_Tp1*)ptr, a1, b1, c1, mode); \ +} \ +inline void v_store_interleave( _Tp0* ptr, const _Tpvec0& a0, const _Tpvec0& b0, \ + const _Tpvec0& c0, const _Tpvec0& d0, \ + hal::StoreMode mode=hal::STORE_UNALIGNED ) \ +{ \ + _Tpvec1 a1 = v_reinterpret_as_##suffix1(a0); \ + _Tpvec1 b1 = v_reinterpret_as_##suffix1(b0); \ + _Tpvec1 c1 = v_reinterpret_as_##suffix1(c0); \ + _Tpvec1 d1 = v_reinterpret_as_##suffix1(d0); \ + v_store_interleave((_Tp1*)ptr, a1, b1, c1, d1, mode); \ +} + +OPENCV_HAL_IMPL_AVX_LOADSTORE_INTERLEAVE(v_int8x32, schar, s8, v_uint8x32, uchar, u8) +OPENCV_HAL_IMPL_AVX_LOADSTORE_INTERLEAVE(v_int16x16, short, s16, v_uint16x16, ushort, u16) +OPENCV_HAL_IMPL_AVX_LOADSTORE_INTERLEAVE(v_int32x8, int, s32, v_uint32x8, unsigned, u32) +OPENCV_HAL_IMPL_AVX_LOADSTORE_INTERLEAVE(v_float32x8, float, f32, v_uint32x8, unsigned, u32) +OPENCV_HAL_IMPL_AVX_LOADSTORE_INTERLEAVE(v_int64x4, int64, s64, v_uint64x4, uint64, u64) +OPENCV_HAL_IMPL_AVX_LOADSTORE_INTERLEAVE(v_float64x4, double, f64, v_uint64x4, uint64, u64) + +// FP16 +inline v_float32x8 v256_load_expand(const float16_t* ptr) +{ + return v_float32x8(_mm256_cvtph_ps(_mm_loadu_si128((const __m128i*)ptr))); +} + +inline void v_pack_store(float16_t* ptr, const v_float32x8& a) +{ + __m128i ah = _mm256_cvtps_ph(a.val, 0); + _mm_storeu_si128((__m128i*)ptr, ah); +} + +inline void v256_cleanup() { _mm256_zeroall(); } + +CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END + +//! @endcond + +} // cv:: + +#endif // OPENCV_HAL_INTRIN_AVX_HPP diff --git a/IPL/include/opencv/opencv2/core/hal/intrin_avx512.hpp b/IPL/include/opencv/opencv2/core/hal/intrin_avx512.hpp new file mode 100644 index 0000000..e189582 --- /dev/null +++ b/IPL/include/opencv/opencv2/core/hal/intrin_avx512.hpp @@ -0,0 +1,3049 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html + +#ifndef OPENCV_HAL_INTRIN_AVX512_HPP +#define OPENCV_HAL_INTRIN_AVX512_HPP + +#if defined(_MSC_VER) && (_MSC_VER < 1920/*MSVS2019*/) +# pragma warning(disable:4146) // unary minus operator applied to unsigned type, result still unsigned +# pragma warning(disable:4309) // 'argument': truncation of constant value +# pragma warning(disable:4310) // cast truncates constant value +#endif + +#define CVT_ROUND_MODES_IMPLEMENTED 0 + +#define CV_SIMD512 1 +#define CV_SIMD512_64F 1 +#define CV_SIMD512_FP16 0 // no native operations with FP16 type. Only load/store from float32x8 are available (if CV_FP16 == 1) + +#define _v512_set_epu64(a7, a6, a5, a4, a3, a2, a1, a0) _mm512_set_epi64((int64)(a7),(int64)(a6),(int64)(a5),(int64)(a4),(int64)(a3),(int64)(a2),(int64)(a1),(int64)(a0)) +#define _v512_set_epu32(a15, a14, a13, a12, a11, a10, a9, a8, a7, a6, a5, a4, a3, a2, a1, a0) \ + _mm512_set_epi64(((int64)(a15)<<32)|(int64)(a14), ((int64)(a13)<<32)|(int64)(a12), ((int64)(a11)<<32)|(int64)(a10), ((int64)( a9)<<32)|(int64)( a8), \ + ((int64)( a7)<<32)|(int64)( a6), ((int64)( a5)<<32)|(int64)( a4), ((int64)( a3)<<32)|(int64)( a2), ((int64)( a1)<<32)|(int64)( a0)) +#define _v512_set_epu16(a31, a30, a29, a28, a27, a26, a25, a24, a23, a22, a21, a20, a19, a18, a17, a16, \ + a15, a14, a13, a12, a11, a10, a9, a8, a7, a6, a5, a4, a3, a2, a1, a0) \ + _v512_set_epu32(((unsigned)(a31)<<16)|(unsigned)(a30), ((unsigned)(a29)<<16)|(unsigned)(a28), ((unsigned)(a27)<<16)|(unsigned)(a26), ((unsigned)(a25)<<16)|(unsigned)(a24), \ + ((unsigned)(a23)<<16)|(unsigned)(a22), ((unsigned)(a21)<<16)|(unsigned)(a20), ((unsigned)(a19)<<16)|(unsigned)(a18), ((unsigned)(a17)<<16)|(unsigned)(a16), \ + ((unsigned)(a15)<<16)|(unsigned)(a14), ((unsigned)(a13)<<16)|(unsigned)(a12), ((unsigned)(a11)<<16)|(unsigned)(a10), ((unsigned)( a9)<<16)|(unsigned)( a8), \ + ((unsigned)( a7)<<16)|(unsigned)( a6), ((unsigned)( a5)<<16)|(unsigned)( a4), ((unsigned)( a3)<<16)|(unsigned)( a2), ((unsigned)( a1)<<16)|(unsigned)( a0)) +#define _v512_set_epu8(a63, a62, a61, a60, a59, a58, a57, a56, a55, a54, a53, a52, a51, a50, a49, a48, \ + a47, a46, a45, a44, a43, a42, a41, a40, a39, a38, a37, a36, a35, a34, a33, a32, \ + a31, a30, a29, a28, a27, a26, a25, a24, a23, a22, a21, a20, a19, a18, a17, a16, \ + a15, a14, a13, a12, a11, a10, a9, a8, a7, a6, a5, a4, a3, a2, a1, a0) \ + _v512_set_epu32(((unsigned)(a63)<<24)|((unsigned)(a62)<<16)|((unsigned)(a61)<<8)|(unsigned)(a60),((unsigned)(a59)<<24)|((unsigned)(a58)<<16)|((unsigned)(a57)<<8)|(unsigned)(a56), \ + ((unsigned)(a55)<<24)|((unsigned)(a54)<<16)|((unsigned)(a53)<<8)|(unsigned)(a52),((unsigned)(a51)<<24)|((unsigned)(a50)<<16)|((unsigned)(a49)<<8)|(unsigned)(a48), \ + ((unsigned)(a47)<<24)|((unsigned)(a46)<<16)|((unsigned)(a45)<<8)|(unsigned)(a44),((unsigned)(a43)<<24)|((unsigned)(a42)<<16)|((unsigned)(a41)<<8)|(unsigned)(a40), \ + ((unsigned)(a39)<<24)|((unsigned)(a38)<<16)|((unsigned)(a37)<<8)|(unsigned)(a36),((unsigned)(a35)<<24)|((unsigned)(a34)<<16)|((unsigned)(a33)<<8)|(unsigned)(a32), \ + ((unsigned)(a31)<<24)|((unsigned)(a30)<<16)|((unsigned)(a29)<<8)|(unsigned)(a28),((unsigned)(a27)<<24)|((unsigned)(a26)<<16)|((unsigned)(a25)<<8)|(unsigned)(a24), \ + ((unsigned)(a23)<<24)|((unsigned)(a22)<<16)|((unsigned)(a21)<<8)|(unsigned)(a20),((unsigned)(a19)<<24)|((unsigned)(a18)<<16)|((unsigned)(a17)<<8)|(unsigned)(a16), \ + ((unsigned)(a15)<<24)|((unsigned)(a14)<<16)|((unsigned)(a13)<<8)|(unsigned)(a12),((unsigned)(a11)<<24)|((unsigned)(a10)<<16)|((unsigned)( a9)<<8)|(unsigned)( a8), \ + ((unsigned)( a7)<<24)|((unsigned)( a6)<<16)|((unsigned)( a5)<<8)|(unsigned)( a4),((unsigned)( a3)<<24)|((unsigned)( a2)<<16)|((unsigned)( a1)<<8)|(unsigned)( a0)) +#define _v512_set_epi8(a63, a62, a61, a60, a59, a58, a57, a56, a55, a54, a53, a52, a51, a50, a49, a48, \ + a47, a46, a45, a44, a43, a42, a41, a40, a39, a38, a37, a36, a35, a34, a33, a32, \ + a31, a30, a29, a28, a27, a26, a25, a24, a23, a22, a21, a20, a19, a18, a17, a16, \ + a15, a14, a13, a12, a11, a10, a9, a8, a7, a6, a5, a4, a3, a2, a1, a0) \ + _v512_set_epu8((uchar)(a63), (uchar)(a62), (uchar)(a61), (uchar)(a60), (uchar)(a59), (uchar)(a58), (uchar)(a57), (uchar)(a56), \ + (uchar)(a55), (uchar)(a54), (uchar)(a53), (uchar)(a52), (uchar)(a51), (uchar)(a50), (uchar)(a49), (uchar)(a48), \ + (uchar)(a47), (uchar)(a46), (uchar)(a45), (uchar)(a44), (uchar)(a43), (uchar)(a42), (uchar)(a41), (uchar)(a40), \ + (uchar)(a39), (uchar)(a38), (uchar)(a37), (uchar)(a36), (uchar)(a35), (uchar)(a34), (uchar)(a33), (uchar)(a32), \ + (uchar)(a31), (uchar)(a30), (uchar)(a29), (uchar)(a28), (uchar)(a27), (uchar)(a26), (uchar)(a25), (uchar)(a24), \ + (uchar)(a23), (uchar)(a22), (uchar)(a21), (uchar)(a20), (uchar)(a19), (uchar)(a18), (uchar)(a17), (uchar)(a16), \ + (uchar)(a15), (uchar)(a14), (uchar)(a13), (uchar)(a12), (uchar)(a11), (uchar)(a10), (uchar)( a9), (uchar)( a8), \ + (uchar)( a7), (uchar)( a6), (uchar)( a5), (uchar)( a4), (uchar)( a3), (uchar)( a2), (uchar)( a1), (uchar)( a0)) + +#ifndef _mm512_cvtpd_pslo +#ifdef _mm512_zextsi256_si512 +#define _mm512_cvtpd_pslo(a) _mm512_zextps256_ps512(_mm512_cvtpd_ps(a)) +#else +//if preferred way to extend with zeros is unavailable +#define _mm512_cvtpd_pslo(a) _mm512_castps256_ps512(_mm512_cvtpd_ps(a)) +#endif +#endif +///////// Utils //////////// + +namespace +{ + +inline __m512i _v512_combine(const __m256i& lo, const __m256i& hi) +{ return _mm512_inserti32x8(_mm512_castsi256_si512(lo), hi, 1); } + +inline __m512 _v512_combine(const __m256& lo, const __m256& hi) +{ return _mm512_insertf32x8(_mm512_castps256_ps512(lo), hi, 1); } + +inline __m512d _v512_combine(const __m256d& lo, const __m256d& hi) +{ return _mm512_insertf64x4(_mm512_castpd256_pd512(lo), hi, 1); } + +inline int _v_cvtsi512_si32(const __m512i& a) +{ return _mm_cvtsi128_si32(_mm512_castsi512_si128(a)); } + +inline __m256i _v512_extract_high(const __m512i& v) +{ return _mm512_extracti32x8_epi32(v, 1); } + +inline __m256 _v512_extract_high(const __m512& v) +{ return _mm512_extractf32x8_ps(v, 1); } + +inline __m256d _v512_extract_high(const __m512d& v) +{ return _mm512_extractf64x4_pd(v, 1); } + +inline __m256i _v512_extract_low(const __m512i& v) +{ return _mm512_castsi512_si256(v); } + +inline __m256 _v512_extract_low(const __m512& v) +{ return _mm512_castps512_ps256(v); } + +inline __m256d _v512_extract_low(const __m512d& v) +{ return _mm512_castpd512_pd256(v); } + +inline __m512i _v512_insert(const __m512i& a, const __m256i& b) +{ return _mm512_inserti32x8(a, b, 0); } + +inline __m512 _v512_insert(const __m512& a, const __m256& b) +{ return _mm512_insertf32x8(a, b, 0); } + +inline __m512d _v512_insert(const __m512d& a, const __m256d& b) +{ return _mm512_insertf64x4(a, b, 0); } + +} + +namespace cv +{ + +//! @cond IGNORED + +CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN + +///////// Types //////////// + +struct v_uint8x64 +{ + typedef uchar lane_type; + enum { nlanes = 64 }; + __m512i val; + + explicit v_uint8x64(__m512i v) : val(v) {} + v_uint8x64(uchar v0, uchar v1, uchar v2, uchar v3, + uchar v4, uchar v5, uchar v6, uchar v7, + uchar v8, uchar v9, uchar v10, uchar v11, + uchar v12, uchar v13, uchar v14, uchar v15, + uchar v16, uchar v17, uchar v18, uchar v19, + uchar v20, uchar v21, uchar v22, uchar v23, + uchar v24, uchar v25, uchar v26, uchar v27, + uchar v28, uchar v29, uchar v30, uchar v31, + uchar v32, uchar v33, uchar v34, uchar v35, + uchar v36, uchar v37, uchar v38, uchar v39, + uchar v40, uchar v41, uchar v42, uchar v43, + uchar v44, uchar v45, uchar v46, uchar v47, + uchar v48, uchar v49, uchar v50, uchar v51, + uchar v52, uchar v53, uchar v54, uchar v55, + uchar v56, uchar v57, uchar v58, uchar v59, + uchar v60, uchar v61, uchar v62, uchar v63) + { + val = _v512_set_epu8(v63, v62, v61, v60, v59, v58, v57, v56, v55, v54, v53, v52, v51, v50, v49, v48, + v47, v46, v45, v44, v43, v42, v41, v40, v39, v38, v37, v36, v35, v34, v33, v32, + v31, v30, v29, v28, v27, v26, v25, v24, v23, v22, v21, v20, v19, v18, v17, v16, + v15, v14, v13, v12, v11, v10, v9, v8, v7, v6, v5, v4, v3, v2, v1, v0); + } + v_uint8x64() : val(_mm512_setzero_si512()) {} + uchar get0() const { return (uchar)_v_cvtsi512_si32(val); } +}; + +struct v_int8x64 +{ + typedef schar lane_type; + enum { nlanes = 64 }; + __m512i val; + + explicit v_int8x64(__m512i v) : val(v) {} + v_int8x64(schar v0, schar v1, schar v2, schar v3, + schar v4, schar v5, schar v6, schar v7, + schar v8, schar v9, schar v10, schar v11, + schar v12, schar v13, schar v14, schar v15, + schar v16, schar v17, schar v18, schar v19, + schar v20, schar v21, schar v22, schar v23, + schar v24, schar v25, schar v26, schar v27, + schar v28, schar v29, schar v30, schar v31, + schar v32, schar v33, schar v34, schar v35, + schar v36, schar v37, schar v38, schar v39, + schar v40, schar v41, schar v42, schar v43, + schar v44, schar v45, schar v46, schar v47, + schar v48, schar v49, schar v50, schar v51, + schar v52, schar v53, schar v54, schar v55, + schar v56, schar v57, schar v58, schar v59, + schar v60, schar v61, schar v62, schar v63) + { + val = _v512_set_epi8(v63, v62, v61, v60, v59, v58, v57, v56, v55, v54, v53, v52, v51, v50, v49, v48, + v47, v46, v45, v44, v43, v42, v41, v40, v39, v38, v37, v36, v35, v34, v33, v32, + v31, v30, v29, v28, v27, v26, v25, v24, v23, v22, v21, v20, v19, v18, v17, v16, + v15, v14, v13, v12, v11, v10, v9, v8, v7, v6, v5, v4, v3, v2, v1, v0); + } + v_int8x64() : val(_mm512_setzero_si512()) {} + schar get0() const { return (schar)_v_cvtsi512_si32(val); } +}; + +struct v_uint16x32 +{ + typedef ushort lane_type; + enum { nlanes = 32 }; + __m512i val; + + explicit v_uint16x32(__m512i v) : val(v) {} + v_uint16x32(ushort v0, ushort v1, ushort v2, ushort v3, + ushort v4, ushort v5, ushort v6, ushort v7, + ushort v8, ushort v9, ushort v10, ushort v11, + ushort v12, ushort v13, ushort v14, ushort v15, + ushort v16, ushort v17, ushort v18, ushort v19, + ushort v20, ushort v21, ushort v22, ushort v23, + ushort v24, ushort v25, ushort v26, ushort v27, + ushort v28, ushort v29, ushort v30, ushort v31) + { + val = _v512_set_epu16(v31, v30, v29, v28, v27, v26, v25, v24, v23, v22, v21, v20, v19, v18, v17, v16, + v15, v14, v13, v12, v11, v10, v9, v8, v7, v6, v5, v4, v3, v2, v1, v0); + } + v_uint16x32() : val(_mm512_setzero_si512()) {} + ushort get0() const { return (ushort)_v_cvtsi512_si32(val); } +}; + +struct v_int16x32 +{ + typedef short lane_type; + enum { nlanes = 32 }; + __m512i val; + + explicit v_int16x32(__m512i v) : val(v) {} + v_int16x32(short v0, short v1, short v2, short v3, short v4, short v5, short v6, short v7, + short v8, short v9, short v10, short v11, short v12, short v13, short v14, short v15, + short v16, short v17, short v18, short v19, short v20, short v21, short v22, short v23, + short v24, short v25, short v26, short v27, short v28, short v29, short v30, short v31) + { + val = _v512_set_epu16((ushort)v31, (ushort)v30, (ushort)v29, (ushort)v28, (ushort)v27, (ushort)v26, (ushort)v25, (ushort)v24, + (ushort)v23, (ushort)v22, (ushort)v21, (ushort)v20, (ushort)v19, (ushort)v18, (ushort)v17, (ushort)v16, + (ushort)v15, (ushort)v14, (ushort)v13, (ushort)v12, (ushort)v11, (ushort)v10, (ushort)v9 , (ushort)v8, + (ushort)v7 , (ushort)v6 , (ushort)v5 , (ushort)v4 , (ushort)v3 , (ushort)v2 , (ushort)v1 , (ushort)v0); + } + v_int16x32() : val(_mm512_setzero_si512()) {} + short get0() const { return (short)_v_cvtsi512_si32(val); } +}; + +struct v_uint32x16 +{ + typedef unsigned lane_type; + enum { nlanes = 16 }; + __m512i val; + + explicit v_uint32x16(__m512i v) : val(v) {} + v_uint32x16(unsigned v0, unsigned v1, unsigned v2, unsigned v3, + unsigned v4, unsigned v5, unsigned v6, unsigned v7, + unsigned v8, unsigned v9, unsigned v10, unsigned v11, + unsigned v12, unsigned v13, unsigned v14, unsigned v15) + { + val = _mm512_setr_epi32((int)v0, (int)v1, (int)v2, (int)v3, (int)v4, (int)v5, (int)v6, (int)v7, + (int)v8, (int)v9, (int)v10, (int)v11, (int)v12, (int)v13, (int)v14, (int)v15); + } + v_uint32x16() : val(_mm512_setzero_si512()) {} + unsigned get0() const { return (unsigned)_v_cvtsi512_si32(val); } +}; + +struct v_int32x16 +{ + typedef int lane_type; + enum { nlanes = 16 }; + __m512i val; + + explicit v_int32x16(__m512i v) : val(v) {} + v_int32x16(int v0, int v1, int v2, int v3, int v4, int v5, int v6, int v7, + int v8, int v9, int v10, int v11, int v12, int v13, int v14, int v15) + { + val = _mm512_setr_epi32(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15); + } + v_int32x16() : val(_mm512_setzero_si512()) {} + int get0() const { return _v_cvtsi512_si32(val); } +}; + +struct v_float32x16 +{ + typedef float lane_type; + enum { nlanes = 16 }; + __m512 val; + + explicit v_float32x16(__m512 v) : val(v) {} + v_float32x16(float v0, float v1, float v2, float v3, float v4, float v5, float v6, float v7, + float v8, float v9, float v10, float v11, float v12, float v13, float v14, float v15) + { + val = _mm512_setr_ps(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15); + } + v_float32x16() : val(_mm512_setzero_ps()) {} + float get0() const { return _mm_cvtss_f32(_mm512_castps512_ps128(val)); } +}; + +struct v_uint64x8 +{ + typedef uint64 lane_type; + enum { nlanes = 8 }; + __m512i val; + + explicit v_uint64x8(__m512i v) : val(v) {} + v_uint64x8(uint64 v0, uint64 v1, uint64 v2, uint64 v3, uint64 v4, uint64 v5, uint64 v6, uint64 v7) + { val = _mm512_setr_epi64((int64)v0, (int64)v1, (int64)v2, (int64)v3, (int64)v4, (int64)v5, (int64)v6, (int64)v7); } + v_uint64x8() : val(_mm512_setzero_si512()) {} + uint64 get0() const + { + #if defined __x86_64__ || defined _M_X64 + return (uint64)_mm_cvtsi128_si64(_mm512_castsi512_si128(val)); + #else + int a = _mm_cvtsi128_si32(_mm512_castsi512_si128(val)); + int b = _mm_cvtsi128_si32(_mm512_castsi512_si128(_mm512_srli_epi64(val, 32))); + return (unsigned)a | ((uint64)(unsigned)b << 32); + #endif + } +}; + +struct v_int64x8 +{ + typedef int64 lane_type; + enum { nlanes = 8 }; + __m512i val; + + explicit v_int64x8(__m512i v) : val(v) {} + v_int64x8(int64 v0, int64 v1, int64 v2, int64 v3, int64 v4, int64 v5, int64 v6, int64 v7) + { val = _mm512_setr_epi64(v0, v1, v2, v3, v4, v5, v6, v7); } + v_int64x8() : val(_mm512_setzero_si512()) {} + + int64 get0() const + { + #if defined __x86_64__ || defined _M_X64 + return (int64)_mm_cvtsi128_si64(_mm512_castsi512_si128(val)); + #else + int a = _mm_cvtsi128_si32(_mm512_castsi512_si128(val)); + int b = _mm_cvtsi128_si32(_mm512_castsi512_si128(_mm512_srli_epi64(val, 32))); + return (int64)((unsigned)a | ((uint64)(unsigned)b << 32)); + #endif + } +}; + +struct v_float64x8 +{ + typedef double lane_type; + enum { nlanes = 8 }; + __m512d val; + + explicit v_float64x8(__m512d v) : val(v) {} + v_float64x8(double v0, double v1, double v2, double v3, double v4, double v5, double v6, double v7) + { val = _mm512_setr_pd(v0, v1, v2, v3, v4, v5, v6, v7); } + v_float64x8() : val(_mm512_setzero_pd()) {} + double get0() const { return _mm_cvtsd_f64(_mm512_castpd512_pd128(val)); } +}; + +//////////////// Load and store operations /////////////// + +#define OPENCV_HAL_IMPL_AVX512_LOADSTORE(_Tpvec, _Tp) \ + inline _Tpvec v512_load(const _Tp* ptr) \ + { return _Tpvec(_mm512_loadu_si512((const __m512i*)ptr)); } \ + inline _Tpvec v512_load_aligned(const _Tp* ptr) \ + { return _Tpvec(_mm512_load_si512((const __m512i*)ptr)); } \ + inline _Tpvec v512_load_low(const _Tp* ptr) \ + { \ + __m256i v256 = _mm256_loadu_si256((const __m256i*)ptr); \ + return _Tpvec(_mm512_castsi256_si512(v256)); \ + } \ + inline _Tpvec v512_load_halves(const _Tp* ptr0, const _Tp* ptr1) \ + { \ + __m256i vlo = _mm256_loadu_si256((const __m256i*)ptr0); \ + __m256i vhi = _mm256_loadu_si256((const __m256i*)ptr1); \ + return _Tpvec(_v512_combine(vlo, vhi)); \ + } \ + inline void v_store(_Tp* ptr, const _Tpvec& a) \ + { _mm512_storeu_si512((__m512i*)ptr, a.val); } \ + inline void v_store_aligned(_Tp* ptr, const _Tpvec& a) \ + { _mm512_store_si512((__m512i*)ptr, a.val); } \ + inline void v_store_aligned_nocache(_Tp* ptr, const _Tpvec& a) \ + { _mm512_stream_si512((__m512i*)ptr, a.val); } \ + inline void v_store(_Tp* ptr, const _Tpvec& a, hal::StoreMode mode) \ + { \ + if( mode == hal::STORE_UNALIGNED ) \ + _mm512_storeu_si512((__m512i*)ptr, a.val); \ + else if( mode == hal::STORE_ALIGNED_NOCACHE ) \ + _mm512_stream_si512((__m512i*)ptr, a.val); \ + else \ + _mm512_store_si512((__m512i*)ptr, a.val); \ + } \ + inline void v_store_low(_Tp* ptr, const _Tpvec& a) \ + { _mm256_storeu_si256((__m256i*)ptr, _v512_extract_low(a.val)); } \ + inline void v_store_high(_Tp* ptr, const _Tpvec& a) \ + { _mm256_storeu_si256((__m256i*)ptr, _v512_extract_high(a.val)); } + +OPENCV_HAL_IMPL_AVX512_LOADSTORE(v_uint8x64, uchar) +OPENCV_HAL_IMPL_AVX512_LOADSTORE(v_int8x64, schar) +OPENCV_HAL_IMPL_AVX512_LOADSTORE(v_uint16x32, ushort) +OPENCV_HAL_IMPL_AVX512_LOADSTORE(v_int16x32, short) +OPENCV_HAL_IMPL_AVX512_LOADSTORE(v_uint32x16, unsigned) +OPENCV_HAL_IMPL_AVX512_LOADSTORE(v_int32x16, int) +OPENCV_HAL_IMPL_AVX512_LOADSTORE(v_uint64x8, uint64) +OPENCV_HAL_IMPL_AVX512_LOADSTORE(v_int64x8, int64) + +#define OPENCV_HAL_IMPL_AVX512_LOADSTORE_FLT(_Tpvec, _Tp, suffix, halfreg) \ + inline _Tpvec v512_load(const _Tp* ptr) \ + { return _Tpvec(_mm512_loadu_##suffix(ptr)); } \ + inline _Tpvec v512_load_aligned(const _Tp* ptr) \ + { return _Tpvec(_mm512_load_##suffix(ptr)); } \ + inline _Tpvec v512_load_low(const _Tp* ptr) \ + { \ + return _Tpvec(_mm512_cast##suffix##256_##suffix##512 \ + (_mm256_loadu_##suffix(ptr))); \ + } \ + inline _Tpvec v512_load_halves(const _Tp* ptr0, const _Tp* ptr1) \ + { \ + halfreg vlo = _mm256_loadu_##suffix(ptr0); \ + halfreg vhi = _mm256_loadu_##suffix(ptr1); \ + return _Tpvec(_v512_combine(vlo, vhi)); \ + } \ + inline void v_store(_Tp* ptr, const _Tpvec& a) \ + { _mm512_storeu_##suffix(ptr, a.val); } \ + inline void v_store_aligned(_Tp* ptr, const _Tpvec& a) \ + { _mm512_store_##suffix(ptr, a.val); } \ + inline void v_store_aligned_nocache(_Tp* ptr, const _Tpvec& a) \ + { _mm512_stream_##suffix(ptr, a.val); } \ + inline void v_store(_Tp* ptr, const _Tpvec& a, hal::StoreMode mode) \ + { \ + if( mode == hal::STORE_UNALIGNED ) \ + _mm512_storeu_##suffix(ptr, a.val); \ + else if( mode == hal::STORE_ALIGNED_NOCACHE ) \ + _mm512_stream_##suffix(ptr, a.val); \ + else \ + _mm512_store_##suffix(ptr, a.val); \ + } \ + inline void v_store_low(_Tp* ptr, const _Tpvec& a) \ + { _mm256_storeu_##suffix(ptr, _v512_extract_low(a.val)); } \ + inline void v_store_high(_Tp* ptr, const _Tpvec& a) \ + { _mm256_storeu_##suffix(ptr, _v512_extract_high(a.val)); } + +OPENCV_HAL_IMPL_AVX512_LOADSTORE_FLT(v_float32x16, float, ps, __m256) +OPENCV_HAL_IMPL_AVX512_LOADSTORE_FLT(v_float64x8, double, pd, __m256d) + +#define OPENCV_HAL_IMPL_AVX512_CAST(_Tpvec, _Tpvecf, suffix, cast) \ + inline _Tpvec v_reinterpret_as_##suffix(const _Tpvecf& a) \ + { return _Tpvec(cast(a.val)); } + +#define OPENCV_HAL_IMPL_AVX512_INIT(_Tpvec, _Tp, suffix, ssuffix, ctype_s) \ + inline _Tpvec v512_setzero_##suffix() \ + { return _Tpvec(_mm512_setzero_si512()); } \ + inline _Tpvec v512_setall_##suffix(_Tp v) \ + { return _Tpvec(_mm512_set1_##ssuffix((ctype_s)v)); } \ + OPENCV_HAL_IMPL_AVX512_CAST(_Tpvec, v_uint8x64, suffix, OPENCV_HAL_NOP) \ + OPENCV_HAL_IMPL_AVX512_CAST(_Tpvec, v_int8x64, suffix, OPENCV_HAL_NOP) \ + OPENCV_HAL_IMPL_AVX512_CAST(_Tpvec, v_uint16x32, suffix, OPENCV_HAL_NOP) \ + OPENCV_HAL_IMPL_AVX512_CAST(_Tpvec, v_int16x32, suffix, OPENCV_HAL_NOP) \ + OPENCV_HAL_IMPL_AVX512_CAST(_Tpvec, v_uint32x16, suffix, OPENCV_HAL_NOP) \ + OPENCV_HAL_IMPL_AVX512_CAST(_Tpvec, v_int32x16, suffix, OPENCV_HAL_NOP) \ + OPENCV_HAL_IMPL_AVX512_CAST(_Tpvec, v_uint64x8, suffix, OPENCV_HAL_NOP) \ + OPENCV_HAL_IMPL_AVX512_CAST(_Tpvec, v_int64x8, suffix, OPENCV_HAL_NOP) \ + OPENCV_HAL_IMPL_AVX512_CAST(_Tpvec, v_float32x16, suffix, _mm512_castps_si512) \ + OPENCV_HAL_IMPL_AVX512_CAST(_Tpvec, v_float64x8, suffix, _mm512_castpd_si512) + +OPENCV_HAL_IMPL_AVX512_INIT(v_uint8x64, uchar, u8, epi8, char) +OPENCV_HAL_IMPL_AVX512_INIT(v_int8x64, schar, s8, epi8, char) +OPENCV_HAL_IMPL_AVX512_INIT(v_uint16x32, ushort, u16, epi16, short) +OPENCV_HAL_IMPL_AVX512_INIT(v_int16x32, short, s16, epi16, short) +OPENCV_HAL_IMPL_AVX512_INIT(v_uint32x16, unsigned, u32, epi32, int) +OPENCV_HAL_IMPL_AVX512_INIT(v_int32x16, int, s32, epi32, int) +OPENCV_HAL_IMPL_AVX512_INIT(v_uint64x8, uint64, u64, epi64, int64) +OPENCV_HAL_IMPL_AVX512_INIT(v_int64x8, int64, s64, epi64, int64) + +#define OPENCV_HAL_IMPL_AVX512_INIT_FLT(_Tpvec, _Tp, suffix, zsuffix, cast) \ + inline _Tpvec v512_setzero_##suffix() \ + { return _Tpvec(_mm512_setzero_##zsuffix()); } \ + inline _Tpvec v512_setall_##suffix(_Tp v) \ + { return _Tpvec(_mm512_set1_##zsuffix(v)); } \ + OPENCV_HAL_IMPL_AVX512_CAST(_Tpvec, v_uint8x64, suffix, cast) \ + OPENCV_HAL_IMPL_AVX512_CAST(_Tpvec, v_int8x64, suffix, cast) \ + OPENCV_HAL_IMPL_AVX512_CAST(_Tpvec, v_uint16x32, suffix, cast) \ + OPENCV_HAL_IMPL_AVX512_CAST(_Tpvec, v_int16x32, suffix, cast) \ + OPENCV_HAL_IMPL_AVX512_CAST(_Tpvec, v_uint32x16, suffix, cast) \ + OPENCV_HAL_IMPL_AVX512_CAST(_Tpvec, v_int32x16, suffix, cast) \ + OPENCV_HAL_IMPL_AVX512_CAST(_Tpvec, v_uint64x8, suffix, cast) \ + OPENCV_HAL_IMPL_AVX512_CAST(_Tpvec, v_int64x8, suffix, cast) + +OPENCV_HAL_IMPL_AVX512_INIT_FLT(v_float32x16, float, f32, ps, _mm512_castsi512_ps) +OPENCV_HAL_IMPL_AVX512_INIT_FLT(v_float64x8, double, f64, pd, _mm512_castsi512_pd) + +inline v_float32x16 v_reinterpret_as_f32(const v_float32x16& a) +{ return a; } +inline v_float32x16 v_reinterpret_as_f32(const v_float64x8& a) +{ return v_float32x16(_mm512_castpd_ps(a.val)); } + +inline v_float64x8 v_reinterpret_as_f64(const v_float64x8& a) +{ return a; } +inline v_float64x8 v_reinterpret_as_f64(const v_float32x16& a) +{ return v_float64x8(_mm512_castps_pd(a.val)); } + +// FP16 +inline v_float32x16 v512_load_expand(const float16_t* ptr) +{ + return v_float32x16(_mm512_cvtph_ps(_mm256_loadu_si256((const __m256i*)ptr))); +} + +inline void v_pack_store(float16_t* ptr, const v_float32x16& a) +{ + __m256i ah = _mm512_cvtps_ph(a.val, 0); + _mm256_storeu_si256((__m256i*)ptr, ah); +} + +/* Recombine & ZIP */ +inline void v_zip(const v_int8x64& a, const v_int8x64& b, v_int8x64& ab0, v_int8x64& ab1) +{ +#if CV_AVX_512VBMI + __m512i mask0 = _v512_set_epu8( 95, 31, 94, 30, 93, 29, 92, 28, 91, 27, 90, 26, 89, 25, 88, 24, + 87, 23, 86, 22, 85, 21, 84, 20, 83, 19, 82, 18, 81, 17, 80, 16, + 79, 15, 78, 14, 77, 13, 76, 12, 75, 11, 74, 10, 73, 9, 72, 8, + 71, 7, 70, 6, 69, 5, 68, 4, 67, 3, 66, 2, 65, 1, 64, 0); + ab0 = v_int8x64(_mm512_permutex2var_epi8(a.val, mask0, b.val)); + __m512i mask1 = _v512_set_epu8(127, 63, 126, 62, 125, 61, 124, 60, 123, 59, 122, 58, 121, 57, 120, 56, + 119, 55, 118, 54, 117, 53, 116, 52, 115, 51, 114, 50, 113, 49, 112, 48, + 111, 47, 110, 46, 109, 45, 108, 44, 107, 43, 106, 42, 105, 41, 104, 40, + 103, 39, 102, 38, 101, 37, 100, 36, 99, 35, 98, 34, 97, 33, 96, 32); + ab1 = v_int8x64(_mm512_permutex2var_epi8(a.val, mask1, b.val)); +#else + __m512i low = _mm512_unpacklo_epi8(a.val, b.val); + __m512i high = _mm512_unpackhi_epi8(a.val, b.val); + ab0 = v_int8x64(_mm512_permutex2var_epi64(low, _v512_set_epu64(11, 10, 3, 2, 9, 8, 1, 0), high)); + ab1 = v_int8x64(_mm512_permutex2var_epi64(low, _v512_set_epu64(15, 14, 7, 6, 13, 12, 5, 4), high)); +#endif +} +inline void v_zip(const v_int16x32& a, const v_int16x32& b, v_int16x32& ab0, v_int16x32& ab1) +{ + __m512i mask0 = _v512_set_epu16(47, 15, 46, 14, 45, 13, 44, 12, 43, 11, 42, 10, 41, 9, 40, 8, + 39, 7, 38, 6, 37, 5, 36, 4, 35, 3, 34, 2, 33, 1, 32, 0); + ab0 = v_int16x32(_mm512_permutex2var_epi16(a.val, mask0, b.val)); + __m512i mask1 = _v512_set_epu16(63, 31, 62, 30, 61, 29, 60, 28, 59, 27, 58, 26, 57, 25, 56, 24, + 55, 23, 54, 22, 53, 21, 52, 20, 51, 19, 50, 18, 49, 17, 48, 16); + ab1 = v_int16x32(_mm512_permutex2var_epi16(a.val, mask1, b.val)); +} +inline void v_zip(const v_int32x16& a, const v_int32x16& b, v_int32x16& ab0, v_int32x16& ab1) +{ + __m512i mask0 = _v512_set_epu32(23, 7, 22, 6, 21, 5, 20, 4, 19, 3, 18, 2, 17, 1, 16, 0); + ab0 = v_int32x16(_mm512_permutex2var_epi32(a.val, mask0, b.val)); + __m512i mask1 = _v512_set_epu32(31, 15, 30, 14, 29, 13, 28, 12, 27, 11, 26, 10, 25, 9, 24, 8); + ab1 = v_int32x16(_mm512_permutex2var_epi32(a.val, mask1, b.val)); +} +inline void v_zip(const v_int64x8& a, const v_int64x8& b, v_int64x8& ab0, v_int64x8& ab1) +{ + __m512i mask0 = _v512_set_epu64(11, 3, 10, 2, 9, 1, 8, 0); + ab0 = v_int64x8(_mm512_permutex2var_epi64(a.val, mask0, b.val)); + __m512i mask1 = _v512_set_epu64(15, 7, 14, 6, 13, 5, 12, 4); + ab1 = v_int64x8(_mm512_permutex2var_epi64(a.val, mask1, b.val)); +} + +inline void v_zip(const v_uint8x64& a, const v_uint8x64& b, v_uint8x64& ab0, v_uint8x64& ab1) +{ + v_int8x64 i0, i1; + v_zip(v_reinterpret_as_s8(a), v_reinterpret_as_s8(b), i0, i1); + ab0 = v_reinterpret_as_u8(i0); + ab1 = v_reinterpret_as_u8(i1); +} +inline void v_zip(const v_uint16x32& a, const v_uint16x32& b, v_uint16x32& ab0, v_uint16x32& ab1) +{ + v_int16x32 i0, i1; + v_zip(v_reinterpret_as_s16(a), v_reinterpret_as_s16(b), i0, i1); + ab0 = v_reinterpret_as_u16(i0); + ab1 = v_reinterpret_as_u16(i1); +} +inline void v_zip(const v_uint32x16& a, const v_uint32x16& b, v_uint32x16& ab0, v_uint32x16& ab1) +{ + v_int32x16 i0, i1; + v_zip(v_reinterpret_as_s32(a), v_reinterpret_as_s32(b), i0, i1); + ab0 = v_reinterpret_as_u32(i0); + ab1 = v_reinterpret_as_u32(i1); +} +inline void v_zip(const v_uint64x8& a, const v_uint64x8& b, v_uint64x8& ab0, v_uint64x8& ab1) +{ + v_int64x8 i0, i1; + v_zip(v_reinterpret_as_s64(a), v_reinterpret_as_s64(b), i0, i1); + ab0 = v_reinterpret_as_u64(i0); + ab1 = v_reinterpret_as_u64(i1); +} +inline void v_zip(const v_float32x16& a, const v_float32x16& b, v_float32x16& ab0, v_float32x16& ab1) +{ + v_int32x16 i0, i1; + v_zip(v_reinterpret_as_s32(a), v_reinterpret_as_s32(b), i0, i1); + ab0 = v_reinterpret_as_f32(i0); + ab1 = v_reinterpret_as_f32(i1); +} +inline void v_zip(const v_float64x8& a, const v_float64x8& b, v_float64x8& ab0, v_float64x8& ab1) +{ + v_int64x8 i0, i1; + v_zip(v_reinterpret_as_s64(a), v_reinterpret_as_s64(b), i0, i1); + ab0 = v_reinterpret_as_f64(i0); + ab1 = v_reinterpret_as_f64(i1); +} + +#define OPENCV_HAL_IMPL_AVX512_COMBINE(_Tpvec, suffix) \ + inline _Tpvec v_combine_low(const _Tpvec& a, const _Tpvec& b) \ + { return _Tpvec(_v512_combine(_v512_extract_low(a.val), _v512_extract_low(b.val))); } \ + inline _Tpvec v_combine_high(const _Tpvec& a, const _Tpvec& b) \ + { return _Tpvec(_v512_insert(b.val, _v512_extract_high(a.val))); } \ + inline void v_recombine(const _Tpvec& a, const _Tpvec& b, \ + _Tpvec& c, _Tpvec& d) \ + { \ + c.val = _v512_combine(_v512_extract_low(a.val),_v512_extract_low(b.val)); \ + d.val = _v512_insert(b.val,_v512_extract_high(a.val)); \ + } + + +OPENCV_HAL_IMPL_AVX512_COMBINE(v_uint8x64, epi8) +OPENCV_HAL_IMPL_AVX512_COMBINE(v_int8x64, epi8) +OPENCV_HAL_IMPL_AVX512_COMBINE(v_uint16x32, epi16) +OPENCV_HAL_IMPL_AVX512_COMBINE(v_int16x32, epi16) +OPENCV_HAL_IMPL_AVX512_COMBINE(v_uint32x16, epi32) +OPENCV_HAL_IMPL_AVX512_COMBINE(v_int32x16, epi32) +OPENCV_HAL_IMPL_AVX512_COMBINE(v_uint64x8, epi64) +OPENCV_HAL_IMPL_AVX512_COMBINE(v_int64x8, epi64) +OPENCV_HAL_IMPL_AVX512_COMBINE(v_float32x16, ps) +OPENCV_HAL_IMPL_AVX512_COMBINE(v_float64x8, pd) + +////////// Arithmetic, bitwise and comparison operations ///////// + +/* Element-wise binary and unary operations */ + +/** Non-saturating arithmetics **/ +#define OPENCV_HAL_IMPL_AVX512_BIN_FUNC(func, _Tpvec, intrin) \ + inline _Tpvec func(const _Tpvec& a, const _Tpvec& b) \ + { return _Tpvec(intrin(a.val, b.val)); } + +OPENCV_HAL_IMPL_AVX512_BIN_FUNC(v_add_wrap, v_uint8x64, _mm512_add_epi8) +OPENCV_HAL_IMPL_AVX512_BIN_FUNC(v_add_wrap, v_int8x64, _mm512_add_epi8) +OPENCV_HAL_IMPL_AVX512_BIN_FUNC(v_add_wrap, v_uint16x32, _mm512_add_epi16) +OPENCV_HAL_IMPL_AVX512_BIN_FUNC(v_add_wrap, v_int16x32, _mm512_add_epi16) +OPENCV_HAL_IMPL_AVX512_BIN_FUNC(v_sub_wrap, v_uint8x64, _mm512_sub_epi8) +OPENCV_HAL_IMPL_AVX512_BIN_FUNC(v_sub_wrap, v_int8x64, _mm512_sub_epi8) +OPENCV_HAL_IMPL_AVX512_BIN_FUNC(v_sub_wrap, v_uint16x32, _mm512_sub_epi16) +OPENCV_HAL_IMPL_AVX512_BIN_FUNC(v_sub_wrap, v_int16x32, _mm512_sub_epi16) +OPENCV_HAL_IMPL_AVX512_BIN_FUNC(v_mul_wrap, v_uint16x32, _mm512_mullo_epi16) +OPENCV_HAL_IMPL_AVX512_BIN_FUNC(v_mul_wrap, v_int16x32, _mm512_mullo_epi16) + +inline v_uint8x64 v_mul_wrap(const v_uint8x64& a, const v_uint8x64& b) +{ + __m512i ad = _mm512_srai_epi16(a.val, 8); + __m512i bd = _mm512_srai_epi16(b.val, 8); + __m512i p0 = _mm512_mullo_epi16(a.val, b.val); // even + __m512i p1 = _mm512_slli_epi16(_mm512_mullo_epi16(ad, bd), 8); // odd + return v_uint8x64(_mm512_mask_blend_epi8(0xAAAAAAAAAAAAAAAA, p0, p1)); +} +inline v_int8x64 v_mul_wrap(const v_int8x64& a, const v_int8x64& b) +{ + return v_reinterpret_as_s8(v_mul_wrap(v_reinterpret_as_u8(a), v_reinterpret_as_u8(b))); +} + +#define OPENCV_HAL_IMPL_AVX512_BIN_OP(bin_op, _Tpvec, intrin) \ + inline _Tpvec operator bin_op (const _Tpvec& a, const _Tpvec& b) \ + { return _Tpvec(intrin(a.val, b.val)); } \ + inline _Tpvec& operator bin_op##= (_Tpvec& a, const _Tpvec& b) \ + { a.val = intrin(a.val, b.val); return a; } + +OPENCV_HAL_IMPL_AVX512_BIN_OP(+, v_uint32x16, _mm512_add_epi32) +OPENCV_HAL_IMPL_AVX512_BIN_OP(-, v_uint32x16, _mm512_sub_epi32) +OPENCV_HAL_IMPL_AVX512_BIN_OP(+, v_int32x16, _mm512_add_epi32) +OPENCV_HAL_IMPL_AVX512_BIN_OP(-, v_int32x16, _mm512_sub_epi32) +OPENCV_HAL_IMPL_AVX512_BIN_OP(+, v_uint64x8, _mm512_add_epi64) +OPENCV_HAL_IMPL_AVX512_BIN_OP(-, v_uint64x8, _mm512_sub_epi64) +OPENCV_HAL_IMPL_AVX512_BIN_OP(+, v_int64x8, _mm512_add_epi64) +OPENCV_HAL_IMPL_AVX512_BIN_OP(-, v_int64x8, _mm512_sub_epi64) + +OPENCV_HAL_IMPL_AVX512_BIN_OP(*, v_uint32x16, _mm512_mullo_epi32) +OPENCV_HAL_IMPL_AVX512_BIN_OP(*, v_int32x16, _mm512_mullo_epi32) +OPENCV_HAL_IMPL_AVX512_BIN_OP(*, v_uint64x8, _mm512_mullo_epi64) +OPENCV_HAL_IMPL_AVX512_BIN_OP(*, v_int64x8, _mm512_mullo_epi64) + +/** Saturating arithmetics **/ +OPENCV_HAL_IMPL_AVX512_BIN_OP(+, v_uint8x64, _mm512_adds_epu8) +OPENCV_HAL_IMPL_AVX512_BIN_OP(-, v_uint8x64, _mm512_subs_epu8) +OPENCV_HAL_IMPL_AVX512_BIN_OP(+, v_int8x64, _mm512_adds_epi8) +OPENCV_HAL_IMPL_AVX512_BIN_OP(-, v_int8x64, _mm512_subs_epi8) +OPENCV_HAL_IMPL_AVX512_BIN_OP(+, v_uint16x32, _mm512_adds_epu16) +OPENCV_HAL_IMPL_AVX512_BIN_OP(-, v_uint16x32, _mm512_subs_epu16) +OPENCV_HAL_IMPL_AVX512_BIN_OP(+, v_int16x32, _mm512_adds_epi16) +OPENCV_HAL_IMPL_AVX512_BIN_OP(-, v_int16x32, _mm512_subs_epi16) + +OPENCV_HAL_IMPL_AVX512_BIN_OP(+, v_float32x16, _mm512_add_ps) +OPENCV_HAL_IMPL_AVX512_BIN_OP(-, v_float32x16, _mm512_sub_ps) +OPENCV_HAL_IMPL_AVX512_BIN_OP(*, v_float32x16, _mm512_mul_ps) +OPENCV_HAL_IMPL_AVX512_BIN_OP(/, v_float32x16, _mm512_div_ps) +OPENCV_HAL_IMPL_AVX512_BIN_OP(+, v_float64x8, _mm512_add_pd) +OPENCV_HAL_IMPL_AVX512_BIN_OP(-, v_float64x8, _mm512_sub_pd) +OPENCV_HAL_IMPL_AVX512_BIN_OP(*, v_float64x8, _mm512_mul_pd) +OPENCV_HAL_IMPL_AVX512_BIN_OP(/, v_float64x8, _mm512_div_pd) + +// saturating multiply +inline v_uint8x64 operator * (const v_uint8x64& a, const v_uint8x64& b) +{ + v_uint16x32 c, d; + v_mul_expand(a, b, c, d); + return v_pack(c, d); +} +inline v_int8x64 operator * (const v_int8x64& a, const v_int8x64& b) +{ + v_int16x32 c, d; + v_mul_expand(a, b, c, d); + return v_pack(c, d); +} +inline v_uint16x32 operator * (const v_uint16x32& a, const v_uint16x32& b) +{ + __m512i pl = _mm512_mullo_epi16(a.val, b.val); + __m512i ph = _mm512_mulhi_epu16(a.val, b.val); + __m512i p0 = _mm512_unpacklo_epi16(pl, ph); + __m512i p1 = _mm512_unpackhi_epi16(pl, ph); + + const __m512i m = _mm512_set1_epi32(65535); + return v_uint16x32(_mm512_packus_epi32(_mm512_min_epu32(p0, m), _mm512_min_epu32(p1, m))); +} +inline v_int16x32 operator * (const v_int16x32& a, const v_int16x32& b) +{ + __m512i pl = _mm512_mullo_epi16(a.val, b.val); + __m512i ph = _mm512_mulhi_epi16(a.val, b.val); + __m512i p0 = _mm512_unpacklo_epi16(pl, ph); + __m512i p1 = _mm512_unpackhi_epi16(pl, ph); + return v_int16x32(_mm512_packs_epi32(p0, p1)); +} + +inline v_uint8x64& operator *= (v_uint8x64& a, const v_uint8x64& b) +{ a = a * b; return a; } +inline v_int8x64& operator *= (v_int8x64& a, const v_int8x64& b) +{ a = a * b; return a; } +inline v_uint16x32& operator *= (v_uint16x32& a, const v_uint16x32& b) +{ a = a * b; return a; } +inline v_int16x32& operator *= (v_int16x32& a, const v_int16x32& b) +{ a = a * b; return a; } + +inline v_int16x32 v_mul_hi(const v_int16x32& a, const v_int16x32& b) { return v_int16x32(_mm512_mulhi_epi16(a.val, b.val)); } +inline v_uint16x32 v_mul_hi(const v_uint16x32& a, const v_uint16x32& b) { return v_uint16x32(_mm512_mulhi_epu16(a.val, b.val)); } + +// Multiply and expand +inline void v_mul_expand(const v_uint8x64& a, const v_uint8x64& b, + v_uint16x32& c, v_uint16x32& d) +{ + v_uint16x32 a0, a1, b0, b1; + v_expand(a, a0, a1); + v_expand(b, b0, b1); + c = v_mul_wrap(a0, b0); + d = v_mul_wrap(a1, b1); +} + +inline void v_mul_expand(const v_int8x64& a, const v_int8x64& b, + v_int16x32& c, v_int16x32& d) +{ + v_int16x32 a0, a1, b0, b1; + v_expand(a, a0, a1); + v_expand(b, b0, b1); + c = v_mul_wrap(a0, b0); + d = v_mul_wrap(a1, b1); +} + +inline void v_mul_expand(const v_int16x32& a, const v_int16x32& b, + v_int32x16& c, v_int32x16& d) +{ + v_int16x32 v0, v1; + v_zip(v_mul_wrap(a, b), v_mul_hi(a, b), v0, v1); + + c = v_reinterpret_as_s32(v0); + d = v_reinterpret_as_s32(v1); +} + +inline void v_mul_expand(const v_uint16x32& a, const v_uint16x32& b, + v_uint32x16& c, v_uint32x16& d) +{ + v_uint16x32 v0, v1; + v_zip(v_mul_wrap(a, b), v_mul_hi(a, b), v0, v1); + + c = v_reinterpret_as_u32(v0); + d = v_reinterpret_as_u32(v1); +} + +inline void v_mul_expand(const v_uint32x16& a, const v_uint32x16& b, + v_uint64x8& c, v_uint64x8& d) +{ + v_zip(v_uint64x8(_mm512_mul_epu32(a.val, b.val)), + v_uint64x8(_mm512_mul_epu32(_mm512_srli_epi64(a.val, 32), _mm512_srli_epi64(b.val, 32))), c, d); +} + +inline void v_mul_expand(const v_int32x16& a, const v_int32x16& b, + v_int64x8& c, v_int64x8& d) +{ + v_zip(v_int64x8(_mm512_mul_epi32(a.val, b.val)), + v_int64x8(_mm512_mul_epi32(_mm512_srli_epi64(a.val, 32), _mm512_srli_epi64(b.val, 32))), c, d); +} + +/** Bitwise shifts **/ +#define OPENCV_HAL_IMPL_AVX512_SHIFT_OP(_Tpuvec, _Tpsvec, suffix) \ + inline _Tpuvec operator << (const _Tpuvec& a, int imm) \ + { return _Tpuvec(_mm512_slli_##suffix(a.val, imm)); } \ + inline _Tpsvec operator << (const _Tpsvec& a, int imm) \ + { return _Tpsvec(_mm512_slli_##suffix(a.val, imm)); } \ + inline _Tpuvec operator >> (const _Tpuvec& a, int imm) \ + { return _Tpuvec(_mm512_srli_##suffix(a.val, imm)); } \ + inline _Tpsvec operator >> (const _Tpsvec& a, int imm) \ + { return _Tpsvec(_mm512_srai_##suffix(a.val, imm)); } \ + template \ + inline _Tpuvec v_shl(const _Tpuvec& a) \ + { return _Tpuvec(_mm512_slli_##suffix(a.val, imm)); } \ + template \ + inline _Tpsvec v_shl(const _Tpsvec& a) \ + { return _Tpsvec(_mm512_slli_##suffix(a.val, imm)); } \ + template \ + inline _Tpuvec v_shr(const _Tpuvec& a) \ + { return _Tpuvec(_mm512_srli_##suffix(a.val, imm)); } \ + template \ + inline _Tpsvec v_shr(const _Tpsvec& a) \ + { return _Tpsvec(_mm512_srai_##suffix(a.val, imm)); } + +OPENCV_HAL_IMPL_AVX512_SHIFT_OP(v_uint16x32, v_int16x32, epi16) +OPENCV_HAL_IMPL_AVX512_SHIFT_OP(v_uint32x16, v_int32x16, epi32) +OPENCV_HAL_IMPL_AVX512_SHIFT_OP(v_uint64x8, v_int64x8, epi64) + + +/** Bitwise logic **/ +#define OPENCV_HAL_IMPL_AVX512_LOGIC_OP(_Tpvec, suffix, not_const) \ + OPENCV_HAL_IMPL_AVX512_BIN_OP(&, _Tpvec, _mm512_and_##suffix) \ + OPENCV_HAL_IMPL_AVX512_BIN_OP(|, _Tpvec, _mm512_or_##suffix) \ + OPENCV_HAL_IMPL_AVX512_BIN_OP(^, _Tpvec, _mm512_xor_##suffix) \ + inline _Tpvec operator ~ (const _Tpvec& a) \ + { return _Tpvec(_mm512_xor_##suffix(a.val, not_const)); } + +OPENCV_HAL_IMPL_AVX512_LOGIC_OP(v_uint8x64, si512, _mm512_set1_epi32(-1)) +OPENCV_HAL_IMPL_AVX512_LOGIC_OP(v_int8x64, si512, _mm512_set1_epi32(-1)) +OPENCV_HAL_IMPL_AVX512_LOGIC_OP(v_uint16x32, si512, _mm512_set1_epi32(-1)) +OPENCV_HAL_IMPL_AVX512_LOGIC_OP(v_int16x32, si512, _mm512_set1_epi32(-1)) +OPENCV_HAL_IMPL_AVX512_LOGIC_OP(v_uint32x16, si512, _mm512_set1_epi32(-1)) +OPENCV_HAL_IMPL_AVX512_LOGIC_OP(v_int32x16, si512, _mm512_set1_epi32(-1)) +OPENCV_HAL_IMPL_AVX512_LOGIC_OP(v_uint64x8, si512, _mm512_set1_epi64(-1)) +OPENCV_HAL_IMPL_AVX512_LOGIC_OP(v_int64x8, si512, _mm512_set1_epi64(-1)) +OPENCV_HAL_IMPL_AVX512_LOGIC_OP(v_float32x16, ps, _mm512_castsi512_ps(_mm512_set1_epi32(-1))) +OPENCV_HAL_IMPL_AVX512_LOGIC_OP(v_float64x8, pd, _mm512_castsi512_pd(_mm512_set1_epi32(-1))) + +/** Select **/ +#define OPENCV_HAL_IMPL_AVX512_SELECT(_Tpvec, suffix, zsuf) \ + inline _Tpvec v_select(const _Tpvec& mask, const _Tpvec& a, const _Tpvec& b) \ + { return _Tpvec(_mm512_mask_blend_##suffix(_mm512_cmp_##suffix##_mask(mask.val, _mm512_setzero_##zsuf(), _MM_CMPINT_EQ), a.val, b.val)); } + +OPENCV_HAL_IMPL_AVX512_SELECT(v_uint8x64, epi8, si512) +OPENCV_HAL_IMPL_AVX512_SELECT(v_int8x64, epi8, si512) +OPENCV_HAL_IMPL_AVX512_SELECT(v_uint16x32, epi16, si512) +OPENCV_HAL_IMPL_AVX512_SELECT(v_int16x32, epi16, si512) +OPENCV_HAL_IMPL_AVX512_SELECT(v_uint32x16, epi32, si512) +OPENCV_HAL_IMPL_AVX512_SELECT(v_int32x16, epi32, si512) +OPENCV_HAL_IMPL_AVX512_SELECT(v_uint64x8, epi64, si512) +OPENCV_HAL_IMPL_AVX512_SELECT(v_int64x8, epi64, si512) +OPENCV_HAL_IMPL_AVX512_SELECT(v_float32x16, ps, ps) +OPENCV_HAL_IMPL_AVX512_SELECT(v_float64x8, pd, pd) + +/** Comparison **/ +#define OPENCV_HAL_IMPL_AVX512_CMP_INT(bin_op, imm8, _Tpvec, sufcmp, sufset, tval) \ + inline _Tpvec operator bin_op (const _Tpvec& a, const _Tpvec& b) \ + { return _Tpvec(_mm512_maskz_set1_##sufset(_mm512_cmp_##sufcmp##_mask(a.val, b.val, imm8), tval)); } + +#define OPENCV_HAL_IMPL_AVX512_CMP_OP_INT(_Tpvec, sufcmp, sufset, tval) \ + OPENCV_HAL_IMPL_AVX512_CMP_INT(==, _MM_CMPINT_EQ, _Tpvec, sufcmp, sufset, tval) \ + OPENCV_HAL_IMPL_AVX512_CMP_INT(!=, _MM_CMPINT_NE, _Tpvec, sufcmp, sufset, tval) \ + OPENCV_HAL_IMPL_AVX512_CMP_INT(<, _MM_CMPINT_LT, _Tpvec, sufcmp, sufset, tval) \ + OPENCV_HAL_IMPL_AVX512_CMP_INT(>, _MM_CMPINT_NLE, _Tpvec, sufcmp, sufset, tval) \ + OPENCV_HAL_IMPL_AVX512_CMP_INT(<=, _MM_CMPINT_LE, _Tpvec, sufcmp, sufset, tval) \ + OPENCV_HAL_IMPL_AVX512_CMP_INT(>=, _MM_CMPINT_NLT, _Tpvec, sufcmp, sufset, tval) + +OPENCV_HAL_IMPL_AVX512_CMP_OP_INT(v_uint8x64, epu8, epi8, (char)-1) +OPENCV_HAL_IMPL_AVX512_CMP_OP_INT(v_int8x64, epi8, epi8, (char)-1) +OPENCV_HAL_IMPL_AVX512_CMP_OP_INT(v_uint16x32, epu16, epi16, (short)-1) +OPENCV_HAL_IMPL_AVX512_CMP_OP_INT(v_int16x32, epi16, epi16, (short)-1) +OPENCV_HAL_IMPL_AVX512_CMP_OP_INT(v_uint32x16, epu32, epi32, (int)-1) +OPENCV_HAL_IMPL_AVX512_CMP_OP_INT(v_int32x16, epi32, epi32, (int)-1) +OPENCV_HAL_IMPL_AVX512_CMP_OP_INT(v_uint64x8, epu64, epi64, (int64)-1) +OPENCV_HAL_IMPL_AVX512_CMP_OP_INT(v_int64x8, epi64, epi64, (int64)-1) + +#define OPENCV_HAL_IMPL_AVX512_CMP_FLT(bin_op, imm8, _Tpvec, sufcmp, sufset, tval) \ + inline _Tpvec operator bin_op (const _Tpvec& a, const _Tpvec& b) \ + { return _Tpvec(_mm512_castsi512_##sufcmp(_mm512_maskz_set1_##sufset(_mm512_cmp_##sufcmp##_mask(a.val, b.val, imm8), tval))); } + +#define OPENCV_HAL_IMPL_AVX512_CMP_OP_FLT(_Tpvec, sufcmp, sufset, tval) \ + OPENCV_HAL_IMPL_AVX512_CMP_FLT(==, _CMP_EQ_OQ, _Tpvec, sufcmp, sufset, tval) \ + OPENCV_HAL_IMPL_AVX512_CMP_FLT(!=, _CMP_NEQ_OQ, _Tpvec, sufcmp, sufset, tval) \ + OPENCV_HAL_IMPL_AVX512_CMP_FLT(<, _CMP_LT_OQ, _Tpvec, sufcmp, sufset, tval) \ + OPENCV_HAL_IMPL_AVX512_CMP_FLT(>, _CMP_GT_OQ, _Tpvec, sufcmp, sufset, tval) \ + OPENCV_HAL_IMPL_AVX512_CMP_FLT(<=, _CMP_LE_OQ, _Tpvec, sufcmp, sufset, tval) \ + OPENCV_HAL_IMPL_AVX512_CMP_FLT(>=, _CMP_GE_OQ, _Tpvec, sufcmp, sufset, tval) + +OPENCV_HAL_IMPL_AVX512_CMP_OP_FLT(v_float32x16, ps, epi32, (int)-1) +OPENCV_HAL_IMPL_AVX512_CMP_OP_FLT(v_float64x8, pd, epi64, (int64)-1) + +inline v_float32x16 v_not_nan(const v_float32x16& a) +{ return v_float32x16(_mm512_castsi512_ps(_mm512_maskz_set1_epi32(_mm512_cmp_ps_mask(a.val, a.val, _CMP_ORD_Q), (int)-1))); } +inline v_float64x8 v_not_nan(const v_float64x8& a) +{ return v_float64x8(_mm512_castsi512_pd(_mm512_maskz_set1_epi64(_mm512_cmp_pd_mask(a.val, a.val, _CMP_ORD_Q), (int64)-1))); } + +/** min/max **/ +OPENCV_HAL_IMPL_AVX512_BIN_FUNC(v_min, v_uint8x64, _mm512_min_epu8) +OPENCV_HAL_IMPL_AVX512_BIN_FUNC(v_max, v_uint8x64, _mm512_max_epu8) +OPENCV_HAL_IMPL_AVX512_BIN_FUNC(v_min, v_int8x64, _mm512_min_epi8) +OPENCV_HAL_IMPL_AVX512_BIN_FUNC(v_max, v_int8x64, _mm512_max_epi8) +OPENCV_HAL_IMPL_AVX512_BIN_FUNC(v_min, v_uint16x32, _mm512_min_epu16) +OPENCV_HAL_IMPL_AVX512_BIN_FUNC(v_max, v_uint16x32, _mm512_max_epu16) +OPENCV_HAL_IMPL_AVX512_BIN_FUNC(v_min, v_int16x32, _mm512_min_epi16) +OPENCV_HAL_IMPL_AVX512_BIN_FUNC(v_max, v_int16x32, _mm512_max_epi16) +OPENCV_HAL_IMPL_AVX512_BIN_FUNC(v_min, v_uint32x16, _mm512_min_epu32) +OPENCV_HAL_IMPL_AVX512_BIN_FUNC(v_max, v_uint32x16, _mm512_max_epu32) +OPENCV_HAL_IMPL_AVX512_BIN_FUNC(v_min, v_int32x16, _mm512_min_epi32) +OPENCV_HAL_IMPL_AVX512_BIN_FUNC(v_max, v_int32x16, _mm512_max_epi32) +OPENCV_HAL_IMPL_AVX512_BIN_FUNC(v_min, v_uint64x8, _mm512_min_epu64) +OPENCV_HAL_IMPL_AVX512_BIN_FUNC(v_max, v_uint64x8, _mm512_max_epu64) +OPENCV_HAL_IMPL_AVX512_BIN_FUNC(v_min, v_int64x8, _mm512_min_epi64) +OPENCV_HAL_IMPL_AVX512_BIN_FUNC(v_max, v_int64x8, _mm512_max_epi64) +OPENCV_HAL_IMPL_AVX512_BIN_FUNC(v_min, v_float32x16, _mm512_min_ps) +OPENCV_HAL_IMPL_AVX512_BIN_FUNC(v_max, v_float32x16, _mm512_max_ps) +OPENCV_HAL_IMPL_AVX512_BIN_FUNC(v_min, v_float64x8, _mm512_min_pd) +OPENCV_HAL_IMPL_AVX512_BIN_FUNC(v_max, v_float64x8, _mm512_max_pd) + +/** Rotate **/ +namespace { + template + struct _v_rotate_right { static inline v_int8x64 eval(const v_int8x64&, const v_int8x64&) { return v_int8x64(); }}; + template + struct _v_rotate_right { static inline v_int8x64 eval(const v_int8x64& a, const v_int8x64& b) + { + return v_int8x64(_mm512_or_si512(_mm512_srli_epi32(_mm512_alignr_epi32(b.val, a.val, imm32 ), imm4 *8), + _mm512_slli_epi32(_mm512_alignr_epi32(b.val, a.val, imm32 + 1), (4-imm4)*8))); + }}; + template + struct _v_rotate_right { static inline v_int8x64 eval(const v_int8x64& a, const v_int8x64& b) + { + return v_int8x64(_mm512_or_si512(_mm512_srli_epi32(_mm512_alignr_epi32(b.val, a.val, 15), imm4 *8), + _mm512_slli_epi32( b.val, (4-imm4)*8))); + }}; + template + struct _v_rotate_right { static inline v_int8x64 eval(const v_int8x64&, const v_int8x64& b) + { + return v_int8x64(_mm512_or_si512(_mm512_srli_epi32(_mm512_alignr_epi32(_mm512_setzero_si512(), b.val, imm32 - 16), imm4 *8), + _mm512_slli_epi32(_mm512_alignr_epi32(_mm512_setzero_si512(), b.val, imm32 - 15), (4-imm4)*8))); + }}; + template + struct _v_rotate_right { static inline v_int8x64 eval(const v_int8x64&, const v_int8x64& b) + { return v_int8x64(_mm512_srli_epi32(_mm512_alignr_epi32(_mm512_setzero_si512(), b.val, 15), imm4*8)); }}; + template + struct _v_rotate_right { static inline v_int8x64 eval(const v_int8x64& a, const v_int8x64& b) + { return v_int8x64(_mm512_alignr_epi32(b.val, a.val, imm32)); }}; + template<> + struct _v_rotate_right { static inline v_int8x64 eval(const v_int8x64& a, const v_int8x64&) { return a; }}; + template + struct _v_rotate_right { static inline v_int8x64 eval(const v_int8x64&, const v_int8x64& b) + { return v_int8x64(_mm512_alignr_epi32(_mm512_setzero_si512(), b.val, imm32 - 16)); }}; + template<> + struct _v_rotate_right { static inline v_int8x64 eval(const v_int8x64&, const v_int8x64& b) { return b; }}; + template<> + struct _v_rotate_right { static inline v_int8x64 eval(const v_int8x64&, const v_int8x64&) { return v_int8x64(); }}; +} +template inline v_int8x64 v_rotate_right(const v_int8x64& a, const v_int8x64& b) +{ + return imm >= 128 ? v_int8x64() : +#if CV_AVX_512VBMI + v_int8x64(_mm512_permutex2var_epi8(a.val, + _v512_set_epu8(0x3f + imm, 0x3e + imm, 0x3d + imm, 0x3c + imm, 0x3b + imm, 0x3a + imm, 0x39 + imm, 0x38 + imm, + 0x37 + imm, 0x36 + imm, 0x35 + imm, 0x34 + imm, 0x33 + imm, 0x32 + imm, 0x31 + imm, 0x30 + imm, + 0x2f + imm, 0x2e + imm, 0x2d + imm, 0x2c + imm, 0x2b + imm, 0x2a + imm, 0x29 + imm, 0x28 + imm, + 0x27 + imm, 0x26 + imm, 0x25 + imm, 0x24 + imm, 0x23 + imm, 0x22 + imm, 0x21 + imm, 0x20 + imm, + 0x1f + imm, 0x1e + imm, 0x1d + imm, 0x1c + imm, 0x1b + imm, 0x1a + imm, 0x19 + imm, 0x18 + imm, + 0x17 + imm, 0x16 + imm, 0x15 + imm, 0x14 + imm, 0x13 + imm, 0x12 + imm, 0x11 + imm, 0x10 + imm, + 0x0f + imm, 0x0e + imm, 0x0d + imm, 0x0c + imm, 0x0b + imm, 0x0a + imm, 0x09 + imm, 0x08 + imm, + 0x07 + imm, 0x06 + imm, 0x05 + imm, 0x04 + imm, 0x03 + imm, 0x02 + imm, 0x01 + imm, 0x00 + imm), b.val)); +#else + _v_rotate_right 15), imm/4>::eval(a, b); +#endif +} +template +inline v_int8x64 v_rotate_left(const v_int8x64& a, const v_int8x64& b) +{ + if (imm == 0) return a; + if (imm == 64) return b; + if (imm >= 128) return v_int8x64(); +#if CV_AVX_512VBMI + return v_int8x64(_mm512_permutex2var_epi8(b.val, + _v512_set_epi8(0x7f - imm,0x7e - imm,0x7d - imm,0x7c - imm,0x7b - imm,0x7a - imm,0x79 - imm,0x78 - imm, + 0x77 - imm,0x76 - imm,0x75 - imm,0x74 - imm,0x73 - imm,0x72 - imm,0x71 - imm,0x70 - imm, + 0x6f - imm,0x6e - imm,0x6d - imm,0x6c - imm,0x6b - imm,0x6a - imm,0x69 - imm,0x68 - imm, + 0x67 - imm,0x66 - imm,0x65 - imm,0x64 - imm,0x63 - imm,0x62 - imm,0x61 - imm,0x60 - imm, + 0x5f - imm,0x5e - imm,0x5d - imm,0x5c - imm,0x5b - imm,0x5a - imm,0x59 - imm,0x58 - imm, + 0x57 - imm,0x56 - imm,0x55 - imm,0x54 - imm,0x53 - imm,0x52 - imm,0x51 - imm,0x50 - imm, + 0x4f - imm,0x4e - imm,0x4d - imm,0x4c - imm,0x4b - imm,0x4a - imm,0x49 - imm,0x48 - imm, + 0x47 - imm,0x46 - imm,0x45 - imm,0x44 - imm,0x43 - imm,0x42 - imm,0x41 - imm,0x40 - imm), a.val)); +#else + return imm < 64 ? v_rotate_right<64 - imm>(b, a) : v_rotate_right<128 - imm>(v512_setzero_s8(), b); +#endif +} +template +inline v_int8x64 v_rotate_right(const v_int8x64& a) +{ + if (imm == 0) return a; + if (imm >= 64) return v_int8x64(); +#if CV_AVX_512VBMI + return v_int8x64(_mm512_maskz_permutexvar_epi8(0xFFFFFFFFFFFFFFFF >> imm, + _v512_set_epu8(0x3f + imm,0x3e + imm,0x3d + imm,0x3c + imm,0x3b + imm,0x3a + imm,0x39 + imm,0x38 + imm, + 0x37 + imm,0x36 + imm,0x35 + imm,0x34 + imm,0x33 + imm,0x32 + imm,0x31 + imm,0x30 + imm, + 0x2f + imm,0x2e + imm,0x2d + imm,0x2c + imm,0x2b + imm,0x2a + imm,0x29 + imm,0x28 + imm, + 0x27 + imm,0x26 + imm,0x25 + imm,0x24 + imm,0x23 + imm,0x22 + imm,0x21 + imm,0x20 + imm, + 0x1f + imm,0x1e + imm,0x1d + imm,0x1c + imm,0x1b + imm,0x1a + imm,0x19 + imm,0x18 + imm, + 0x17 + imm,0x16 + imm,0x15 + imm,0x14 + imm,0x13 + imm,0x12 + imm,0x11 + imm,0x10 + imm, + 0x0f + imm,0x0e + imm,0x0d + imm,0x0c + imm,0x0b + imm,0x0a + imm,0x09 + imm,0x08 + imm, + 0x07 + imm,0x06 + imm,0x05 + imm,0x04 + imm,0x03 + imm,0x02 + imm,0x01 + imm,0x00 + imm), a.val)); +#else + return v_rotate_right(a, v512_setzero_s8()); +#endif +} +template +inline v_int8x64 v_rotate_left(const v_int8x64& a) +{ + if (imm == 0) return a; + if (imm >= 64) return v_int8x64(); +#if CV_AVX_512VBMI + return v_int8x64(_mm512_maskz_permutexvar_epi8(0xFFFFFFFFFFFFFFFF << imm, + _v512_set_epi8(0x3f - imm,0x3e - imm,0x3d - imm,0x3c - imm,0x3b - imm,0x3a - imm,0x39 - imm,0x38 - imm, + 0x37 - imm,0x36 - imm,0x35 - imm,0x34 - imm,0x33 - imm,0x32 - imm,0x31 - imm,0x30 - imm, + 0x2f - imm,0x2e - imm,0x2d - imm,0x2c - imm,0x2b - imm,0x2a - imm,0x29 - imm,0x28 - imm, + 0x27 - imm,0x26 - imm,0x25 - imm,0x24 - imm,0x23 - imm,0x22 - imm,0x21 - imm,0x20 - imm, + 0x1f - imm,0x1e - imm,0x1d - imm,0x1c - imm,0x1b - imm,0x1a - imm,0x19 - imm,0x18 - imm, + 0x17 - imm,0x16 - imm,0x15 - imm,0x14 - imm,0x13 - imm,0x12 - imm,0x11 - imm,0x10 - imm, + 0x0f - imm,0x0e - imm,0x0d - imm,0x0c - imm,0x0b - imm,0x0a - imm,0x09 - imm,0x08 - imm, + 0x07 - imm,0x06 - imm,0x05 - imm,0x04 - imm,0x03 - imm,0x02 - imm,0x01 - imm,0x00 - imm), a.val)); +#else + return v_rotate_right<64 - imm>(v512_setzero_s8(), a); +#endif +} + +#define OPENCV_HAL_IMPL_AVX512_ROTATE_PM(_Tpvec, suffix) \ +template inline _Tpvec v_rotate_left(const _Tpvec& a, const _Tpvec& b) \ +{ return v_reinterpret_as_##suffix(v_rotate_left(v_reinterpret_as_s8(a), v_reinterpret_as_s8(b))); } \ +template inline _Tpvec v_rotate_right(const _Tpvec& a, const _Tpvec& b) \ +{ return v_reinterpret_as_##suffix(v_rotate_right(v_reinterpret_as_s8(a), v_reinterpret_as_s8(b))); } \ +template inline _Tpvec v_rotate_left(const _Tpvec& a) \ +{ return v_reinterpret_as_##suffix(v_rotate_left(v_reinterpret_as_s8(a))); } \ +template inline _Tpvec v_rotate_right(const _Tpvec& a) \ +{ return v_reinterpret_as_##suffix(v_rotate_right(v_reinterpret_as_s8(a))); } + +#define OPENCV_HAL_IMPL_AVX512_ROTATE_EC(_Tpvec, suffix) \ +template \ +inline _Tpvec v_rotate_left(const _Tpvec& a, const _Tpvec& b) \ +{ \ + enum { SHIFT2 = (_Tpvec::nlanes - imm) }; \ + enum { MASK = ((1 << _Tpvec::nlanes) - 1) }; \ + if (imm == 0) return a; \ + if (imm == _Tpvec::nlanes) return b; \ + if (imm >= 2*_Tpvec::nlanes) return _Tpvec(); \ + return _Tpvec(_mm512_mask_expand_##suffix(_mm512_maskz_compress_##suffix((MASK << SHIFT2)&MASK, b.val), (MASK << (imm))&MASK, a.val)); \ +} \ +template \ +inline _Tpvec v_rotate_right(const _Tpvec& a, const _Tpvec& b) \ +{ \ + enum { SHIFT2 = (_Tpvec::nlanes - imm) }; \ + enum { MASK = ((1 << _Tpvec::nlanes) - 1) }; \ + if (imm == 0) return a; \ + if (imm == _Tpvec::nlanes) return b; \ + if (imm >= 2*_Tpvec::nlanes) return _Tpvec(); \ + return _Tpvec(_mm512_mask_expand_##suffix(_mm512_maskz_compress_##suffix((MASK << (imm))&MASK, a.val), (MASK << SHIFT2)&MASK, b.val)); \ +} \ +template \ +inline _Tpvec v_rotate_left(const _Tpvec& a) \ +{ \ + if (imm == 0) return a; \ + if (imm >= _Tpvec::nlanes) return _Tpvec(); \ + return _Tpvec(_mm512_maskz_expand_##suffix((1 << _Tpvec::nlanes) - (1 << (imm)), a.val)); \ +} \ +template \ +inline _Tpvec v_rotate_right(const _Tpvec& a) \ +{ \ + if (imm == 0) return a; \ + if (imm >= _Tpvec::nlanes) return _Tpvec(); \ + return _Tpvec(_mm512_maskz_compress_##suffix((1 << _Tpvec::nlanes) - (1 << (imm)), a.val)); \ +} + +OPENCV_HAL_IMPL_AVX512_ROTATE_PM(v_uint8x64, u8) +OPENCV_HAL_IMPL_AVX512_ROTATE_PM(v_uint16x32, u16) +OPENCV_HAL_IMPL_AVX512_ROTATE_PM(v_int16x32, s16) +OPENCV_HAL_IMPL_AVX512_ROTATE_EC(v_uint32x16, epi32) +OPENCV_HAL_IMPL_AVX512_ROTATE_EC(v_int32x16, epi32) +OPENCV_HAL_IMPL_AVX512_ROTATE_EC(v_uint64x8, epi64) +OPENCV_HAL_IMPL_AVX512_ROTATE_EC(v_int64x8, epi64) +OPENCV_HAL_IMPL_AVX512_ROTATE_EC(v_float32x16, ps) +OPENCV_HAL_IMPL_AVX512_ROTATE_EC(v_float64x8, pd) + +/** Reverse **/ +inline v_uint8x64 v_reverse(const v_uint8x64 &a) +{ +#if CV_AVX_512VBMI + static const __m512i perm = _mm512_set_epi32( + 0x00010203, 0x04050607, 0x08090a0b, 0x0c0d0e0f, + 0x10111213, 0x14151617, 0x18191a1b, 0x1c1d1e1f, + 0x20212223, 0x24252627, 0x28292a2b, 0x2c2d2e2f, + 0x30313233, 0x34353637, 0x38393a3b, 0x3c3d3e3f); + return v_uint8x64(_mm512_permutexvar_epi8(perm, a.val)); +#else + static const __m512i shuf = _mm512_set_epi32( + 0x00010203, 0x04050607, 0x08090a0b, 0x0c0d0e0f, + 0x00010203, 0x04050607, 0x08090a0b, 0x0c0d0e0f, + 0x00010203, 0x04050607, 0x08090a0b, 0x0c0d0e0f, + 0x00010203, 0x04050607, 0x08090a0b, 0x0c0d0e0f); + static const __m512i perm = _mm512_set_epi64(1, 0, 3, 2, 5, 4, 7, 6); + __m512i vec = _mm512_shuffle_epi8(a.val, shuf); + return v_uint8x64(_mm512_permutexvar_epi64(perm, vec)); +#endif +} + +inline v_int8x64 v_reverse(const v_int8x64 &a) +{ return v_reinterpret_as_s8(v_reverse(v_reinterpret_as_u8(a))); } + +inline v_uint16x32 v_reverse(const v_uint16x32 &a) +{ +#if CV_AVX_512VBMI + static const __m512i perm = _mm512_set_epi32( + 0x00000001, 0x00020003, 0x00040005, 0x00060007, + 0x00080009, 0x000a000b, 0x000c000d, 0x000e000f, + 0x00100011, 0x00120013, 0x00140015, 0x00160017, + 0x00180019, 0x001a001b, 0x001c001d, 0x001e001f); + return v_uint16x32(_mm512_permutexvar_epi16(perm, a.val)); +#else + static const __m512i shuf = _mm512_set_epi32( + 0x01000302, 0x05040706, 0x09080b0a, 0x0d0c0f0e, + 0x01000302, 0x05040706, 0x09080b0a, 0x0d0c0f0e, + 0x01000302, 0x05040706, 0x09080b0a, 0x0d0c0f0e, + 0x01000302, 0x05040706, 0x09080b0a, 0x0d0c0f0e); + static const __m512i perm = _mm512_set_epi64(1, 0, 3, 2, 5, 4, 7, 6); + __m512i vec = _mm512_shuffle_epi8(a.val, shuf); + return v_uint16x32(_mm512_permutexvar_epi64(perm, vec)); +#endif +} + +inline v_int16x32 v_reverse(const v_int16x32 &a) +{ return v_reinterpret_as_s16(v_reverse(v_reinterpret_as_u16(a))); } + +inline v_uint32x16 v_reverse(const v_uint32x16 &a) +{ + static const __m512i perm = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,14, 15); + return v_uint32x16(_mm512_permutexvar_epi32(perm, a.val)); +} + +inline v_int32x16 v_reverse(const v_int32x16 &a) +{ return v_reinterpret_as_s32(v_reverse(v_reinterpret_as_u32(a))); } + +inline v_float32x16 v_reverse(const v_float32x16 &a) +{ return v_reinterpret_as_f32(v_reverse(v_reinterpret_as_u32(a))); } + +inline v_uint64x8 v_reverse(const v_uint64x8 &a) +{ + static const __m512i perm = _mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 7); + return v_uint64x8(_mm512_permutexvar_epi64(perm, a.val)); +} + +inline v_int64x8 v_reverse(const v_int64x8 &a) +{ return v_reinterpret_as_s64(v_reverse(v_reinterpret_as_u64(a))); } + +inline v_float64x8 v_reverse(const v_float64x8 &a) +{ return v_reinterpret_as_f64(v_reverse(v_reinterpret_as_u64(a))); } + +////////// Reduce ///////// + +/** Reduce **/ +#define OPENCV_HAL_IMPL_AVX512_REDUCE_ADD64(a, b) a + b +#define OPENCV_HAL_IMPL_AVX512_REDUCE_8(sctype, func, _Tpvec, ifunc, scop) \ + inline sctype v_reduce_##func(const _Tpvec& a) \ + { __m256i half = _mm256_##ifunc(_v512_extract_low(a.val), _v512_extract_high(a.val)); \ + sctype CV_DECL_ALIGNED(64) idx[2]; \ + _mm_store_si128((__m128i*)idx, _mm_##ifunc(_mm256_castsi256_si128(half), _mm256_extracti128_si256(half, 1))); \ + return scop(idx[0], idx[1]); } +OPENCV_HAL_IMPL_AVX512_REDUCE_8(uint64, min, v_uint64x8, min_epu64, min) +OPENCV_HAL_IMPL_AVX512_REDUCE_8(uint64, max, v_uint64x8, max_epu64, max) +OPENCV_HAL_IMPL_AVX512_REDUCE_8(uint64, sum, v_uint64x8, add_epi64, OPENCV_HAL_IMPL_AVX512_REDUCE_ADD64) +OPENCV_HAL_IMPL_AVX512_REDUCE_8(int64, min, v_int64x8, min_epi64, min) +OPENCV_HAL_IMPL_AVX512_REDUCE_8(int64, max, v_int64x8, max_epi64, max) +OPENCV_HAL_IMPL_AVX512_REDUCE_8(int64, sum, v_int64x8, add_epi64, OPENCV_HAL_IMPL_AVX512_REDUCE_ADD64) + +#define OPENCV_HAL_IMPL_AVX512_REDUCE_8F(func, ifunc, scop) \ + inline double v_reduce_##func(const v_float64x8& a) \ + { __m256d half = _mm256_##ifunc(_v512_extract_low(a.val), _v512_extract_high(a.val)); \ + double CV_DECL_ALIGNED(64) idx[2]; \ + _mm_store_pd(idx, _mm_##ifunc(_mm256_castpd256_pd128(half), _mm256_extractf128_pd(half, 1))); \ + return scop(idx[0], idx[1]); } +OPENCV_HAL_IMPL_AVX512_REDUCE_8F(min, min_pd, min) +OPENCV_HAL_IMPL_AVX512_REDUCE_8F(max, max_pd, max) +OPENCV_HAL_IMPL_AVX512_REDUCE_8F(sum, add_pd, OPENCV_HAL_IMPL_AVX512_REDUCE_ADD64) + +#define OPENCV_HAL_IMPL_AVX512_REDUCE_16(sctype, func, _Tpvec, ifunc) \ + inline sctype v_reduce_##func(const _Tpvec& a) \ + { __m256i half = _mm256_##ifunc(_v512_extract_low(a.val), _v512_extract_high(a.val)); \ + __m128i quarter = _mm_##ifunc(_mm256_castsi256_si128(half), _mm256_extracti128_si256(half, 1)); \ + quarter = _mm_##ifunc(quarter, _mm_srli_si128(quarter, 8)); \ + quarter = _mm_##ifunc(quarter, _mm_srli_si128(quarter, 4)); \ + return (sctype)_mm_cvtsi128_si32(quarter); } +OPENCV_HAL_IMPL_AVX512_REDUCE_16(uint, min, v_uint32x16, min_epu32) +OPENCV_HAL_IMPL_AVX512_REDUCE_16(uint, max, v_uint32x16, max_epu32) +OPENCV_HAL_IMPL_AVX512_REDUCE_16(int, min, v_int32x16, min_epi32) +OPENCV_HAL_IMPL_AVX512_REDUCE_16(int, max, v_int32x16, max_epi32) + +#define OPENCV_HAL_IMPL_AVX512_REDUCE_16F(func, ifunc) \ + inline float v_reduce_##func(const v_float32x16& a) \ + { __m256 half = _mm256_##ifunc(_v512_extract_low(a.val), _v512_extract_high(a.val)); \ + __m128 quarter = _mm_##ifunc(_mm256_castps256_ps128(half), _mm256_extractf128_ps(half, 1)); \ + quarter = _mm_##ifunc(quarter, _mm_permute_ps(quarter, _MM_SHUFFLE(0, 0, 3, 2))); \ + quarter = _mm_##ifunc(quarter, _mm_permute_ps(quarter, _MM_SHUFFLE(0, 0, 0, 1))); \ + return _mm_cvtss_f32(quarter); } +OPENCV_HAL_IMPL_AVX512_REDUCE_16F(min, min_ps) +OPENCV_HAL_IMPL_AVX512_REDUCE_16F(max, max_ps) + +inline float v_reduce_sum(const v_float32x16& a) +{ + __m256 half = _mm256_add_ps(_v512_extract_low(a.val), _v512_extract_high(a.val)); + __m128 quarter = _mm_add_ps(_mm256_castps256_ps128(half), _mm256_extractf128_ps(half, 1)); + quarter = _mm_hadd_ps(quarter, quarter); + return _mm_cvtss_f32(_mm_hadd_ps(quarter, quarter)); +} +inline int v_reduce_sum(const v_int32x16& a) +{ + __m256i half = _mm256_add_epi32(_v512_extract_low(a.val), _v512_extract_high(a.val)); + __m128i quarter = _mm_add_epi32(_mm256_castsi256_si128(half), _mm256_extracti128_si256(half, 1)); + quarter = _mm_hadd_epi32(quarter, quarter); + return _mm_cvtsi128_si32(_mm_hadd_epi32(quarter, quarter)); +} +inline uint v_reduce_sum(const v_uint32x16& a) +{ return (uint)v_reduce_sum(v_reinterpret_as_s32(a)); } + +#define OPENCV_HAL_IMPL_AVX512_REDUCE_32(sctype, func, _Tpvec, ifunc) \ + inline sctype v_reduce_##func(const _Tpvec& a) \ + { __m256i half = _mm256_##ifunc(_v512_extract_low(a.val), _v512_extract_high(a.val)); \ + __m128i quarter = _mm_##ifunc(_mm256_castsi256_si128(half), _mm256_extracti128_si256(half, 1)); \ + quarter = _mm_##ifunc(quarter, _mm_srli_si128(quarter, 8)); \ + quarter = _mm_##ifunc(quarter, _mm_srli_si128(quarter, 4)); \ + quarter = _mm_##ifunc(quarter, _mm_srli_si128(quarter, 2)); \ + return (sctype)_mm_cvtsi128_si32(quarter); } +OPENCV_HAL_IMPL_AVX512_REDUCE_32(ushort, min, v_uint16x32, min_epu16) +OPENCV_HAL_IMPL_AVX512_REDUCE_32(ushort, max, v_uint16x32, max_epu16) +OPENCV_HAL_IMPL_AVX512_REDUCE_32(short, min, v_int16x32, min_epi16) +OPENCV_HAL_IMPL_AVX512_REDUCE_32(short, max, v_int16x32, max_epi16) + +inline int v_reduce_sum(const v_int16x32& a) +{ return v_reduce_sum(v_expand_low(a) + v_expand_high(a)); } +inline uint v_reduce_sum(const v_uint16x32& a) +{ return v_reduce_sum(v_expand_low(a) + v_expand_high(a)); } + +#define OPENCV_HAL_IMPL_AVX512_REDUCE_64(sctype, func, _Tpvec, ifunc) \ + inline sctype v_reduce_##func(const _Tpvec& a) \ + { __m256i half = _mm256_##ifunc(_v512_extract_low(a.val), _v512_extract_high(a.val)); \ + __m128i quarter = _mm_##ifunc(_mm256_castsi256_si128(half), _mm256_extracti128_si256(half, 1)); \ + quarter = _mm_##ifunc(quarter, _mm_srli_si128(quarter, 8)); \ + quarter = _mm_##ifunc(quarter, _mm_srli_si128(quarter, 4)); \ + quarter = _mm_##ifunc(quarter, _mm_srli_si128(quarter, 2)); \ + quarter = _mm_##ifunc(quarter, _mm_srli_si128(quarter, 1)); \ + return (sctype)_mm_cvtsi128_si32(quarter); } +OPENCV_HAL_IMPL_AVX512_REDUCE_64(uchar, min, v_uint8x64, min_epu8) +OPENCV_HAL_IMPL_AVX512_REDUCE_64(uchar, max, v_uint8x64, max_epu8) +OPENCV_HAL_IMPL_AVX512_REDUCE_64(schar, min, v_int8x64, min_epi8) +OPENCV_HAL_IMPL_AVX512_REDUCE_64(schar, max, v_int8x64, max_epi8) + +#define OPENCV_HAL_IMPL_AVX512_REDUCE_64_SUM(sctype, _Tpvec, suffix) \ + inline sctype v_reduce_sum(const _Tpvec& a) \ + { __m512i a16 = _mm512_add_epi16(_mm512_cvt##suffix##_epi16(_v512_extract_low(a.val)), \ + _mm512_cvt##suffix##_epi16(_v512_extract_high(a.val))); \ + a16 = _mm512_cvtepi16_epi32(_mm256_add_epi16(_v512_extract_low(a16), _v512_extract_high(a16))); \ + __m256i a8 = _mm256_add_epi32(_v512_extract_low(a16), _v512_extract_high(a16)); \ + __m128i a4 = _mm_add_epi32(_mm256_castsi256_si128(a8), _mm256_extracti128_si256(a8, 1)); \ + a4 = _mm_hadd_epi32(a4, a4); \ + return (sctype)_mm_cvtsi128_si32(_mm_hadd_epi32(a4, a4)); } +OPENCV_HAL_IMPL_AVX512_REDUCE_64_SUM(uint, v_uint8x64, epu8) +OPENCV_HAL_IMPL_AVX512_REDUCE_64_SUM(int, v_int8x64, epi8) + +inline v_float32x16 v_reduce_sum4(const v_float32x16& a, const v_float32x16& b, + const v_float32x16& c, const v_float32x16& d) +{ + __m256 abl = _mm256_hadd_ps(_v512_extract_low(a.val), _v512_extract_low(b.val)); + __m256 abh = _mm256_hadd_ps(_v512_extract_high(a.val), _v512_extract_high(b.val)); + __m256 cdl = _mm256_hadd_ps(_v512_extract_low(c.val), _v512_extract_low(d.val)); + __m256 cdh = _mm256_hadd_ps(_v512_extract_high(c.val), _v512_extract_high(d.val)); + return v_float32x16(_v512_combine(_mm256_hadd_ps(abl, cdl), _mm256_hadd_ps(abh, cdh))); +} + +inline unsigned v_reduce_sad(const v_uint8x64& a, const v_uint8x64& b) +{ + __m512i val = _mm512_sad_epu8(a.val, b.val); + __m256i half = _mm256_add_epi32(_v512_extract_low(val), _v512_extract_high(val)); + __m128i quarter = _mm_add_epi32(_mm256_castsi256_si128(half), _mm256_extracti128_si256(half, 1)); + return (unsigned)_mm_cvtsi128_si32(_mm_add_epi32(quarter, _mm_unpackhi_epi64(quarter, quarter))); +} +inline unsigned v_reduce_sad(const v_int8x64& a, const v_int8x64& b) +{ + __m512i val = _mm512_set1_epi8(-128); + val = _mm512_sad_epu8(_mm512_add_epi8(a.val, val), _mm512_add_epi8(b.val, val)); + __m256i half = _mm256_add_epi32(_v512_extract_low(val), _v512_extract_high(val)); + __m128i quarter = _mm_add_epi32(_mm256_castsi256_si128(half), _mm256_extracti128_si256(half, 1)); + return (unsigned)_mm_cvtsi128_si32(_mm_add_epi32(quarter, _mm_unpackhi_epi64(quarter, quarter))); +} +inline unsigned v_reduce_sad(const v_uint16x32& a, const v_uint16x32& b) +{ return v_reduce_sum(v_add_wrap(a - b, b - a)); } +inline unsigned v_reduce_sad(const v_int16x32& a, const v_int16x32& b) +{ return v_reduce_sum(v_reinterpret_as_u16(v_sub_wrap(v_max(a, b), v_min(a, b)))); } +inline unsigned v_reduce_sad(const v_uint32x16& a, const v_uint32x16& b) +{ return v_reduce_sum(v_max(a, b) - v_min(a, b)); } +inline unsigned v_reduce_sad(const v_int32x16& a, const v_int32x16& b) +{ return v_reduce_sum(v_reinterpret_as_u32(v_max(a, b) - v_min(a, b))); } +inline float v_reduce_sad(const v_float32x16& a, const v_float32x16& b) +{ return v_reduce_sum((a - b) & v_float32x16(_mm512_castsi512_ps(_mm512_set1_epi32(0x7fffffff)))); } +inline double v_reduce_sad(const v_float64x8& a, const v_float64x8& b) +{ return v_reduce_sum((a - b) & v_float64x8(_mm512_castsi512_pd(_mm512_set1_epi64(0x7fffffffffffffff)))); } + +/** Popcount **/ +inline v_uint8x64 v_popcount(const v_int8x64& a) +{ +#if CV_AVX_512BITALG + return v_uint8x64(_mm512_popcnt_epi8(a.val)); +#elif CV_AVX_512VBMI + __m512i _popcnt_table0 = _v512_set_epu8(7, 6, 6, 5, 6, 5, 5, 4, 6, 5, 5, 4, 5, 4, 4, 3, + 5, 4, 4, 3, 4, 3, 3, 2, 4, 3, 3, 2, 3, 2, 2, 1, + 5, 4, 4, 3, 4, 3, 3, 2, 4, 3, 3, 2, 3, 2, 2, 1, + 4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0); + __m512i _popcnt_table1 = _v512_set_epu8(7, 6, 6, 5, 6, 5, 5, 4, 6, 5, 5, 4, 5, 4, 4, 3, + 6, 5, 5, 4, 5, 4, 4, 3, 5, 4, 4, 3, 4, 3, 3, 2, + 6, 5, 5, 4, 5, 4, 4, 3, 5, 4, 4, 3, 4, 3, 3, 2, + 5, 4, 4, 3, 4, 3, 3, 2, 4, 3, 3, 2, 3, 2, 2, 1); + return v_uint8x64(_mm512_sub_epi8(_mm512_permutex2var_epi8(_popcnt_table0, a.val, _popcnt_table1), _mm512_movm_epi8(_mm512_movepi8_mask(a.val)))); +#else + __m512i _popcnt_table = _mm512_set4_epi32(0x04030302, 0x03020201, 0x03020201, 0x02010100); + __m512i _popcnt_mask = _mm512_set1_epi8(0x0F); + + return v_uint8x64(_mm512_add_epi8(_mm512_shuffle_epi8(_popcnt_table, _mm512_and_si512( a.val, _popcnt_mask)), + _mm512_shuffle_epi8(_popcnt_table, _mm512_and_si512(_mm512_srli_epi16(a.val, 4), _popcnt_mask)))); +#endif +} +inline v_uint16x32 v_popcount(const v_int16x32& a) +{ +#if CV_AVX_512BITALG + return v_uint16x32(_mm512_popcnt_epi16(a.val)); +#elif CV_AVX_512VPOPCNTDQ + __m512i zero = _mm512_setzero_si512(); + return v_uint16x32(_mm512_packs_epi32(_mm512_popcnt_epi32(_mm512_unpacklo_epi16(a.val, zero)), + _mm512_popcnt_epi32(_mm512_unpackhi_epi16(a.val, zero)))); +#else + v_uint8x64 p = v_popcount(v_reinterpret_as_s8(a)); + p += v_rotate_right<1>(p); + return v_reinterpret_as_u16(p) & v512_setall_u16(0x00ff); +#endif +} +inline v_uint32x16 v_popcount(const v_int32x16& a) +{ +#if CV_AVX_512VPOPCNTDQ + return v_uint32x16(_mm512_popcnt_epi32(a.val)); +#else + v_uint8x64 p = v_popcount(v_reinterpret_as_s8(a)); + p += v_rotate_right<1>(p); + p += v_rotate_right<2>(p); + return v_reinterpret_as_u32(p) & v512_setall_u32(0x000000ff); +#endif +} +inline v_uint64x8 v_popcount(const v_int64x8& a) +{ +#if CV_AVX_512VPOPCNTDQ + return v_uint64x8(_mm512_popcnt_epi64(a.val)); +#else + return v_uint64x8(_mm512_sad_epu8(v_popcount(v_reinterpret_as_s8(a)).val, _mm512_setzero_si512())); +#endif +} + + +inline v_uint8x64 v_popcount(const v_uint8x64& a) { return v_popcount(v_reinterpret_as_s8 (a)); } +inline v_uint16x32 v_popcount(const v_uint16x32& a) { return v_popcount(v_reinterpret_as_s16(a)); } +inline v_uint32x16 v_popcount(const v_uint32x16& a) { return v_popcount(v_reinterpret_as_s32(a)); } +inline v_uint64x8 v_popcount(const v_uint64x8& a) { return v_popcount(v_reinterpret_as_s64(a)); } + + +////////// Other math ///////// + +/** Some frequent operations **/ +#define OPENCV_HAL_IMPL_AVX512_MULADD(_Tpvec, suffix) \ + inline _Tpvec v_fma(const _Tpvec& a, const _Tpvec& b, const _Tpvec& c) \ + { return _Tpvec(_mm512_fmadd_##suffix(a.val, b.val, c.val)); } \ + inline _Tpvec v_muladd(const _Tpvec& a, const _Tpvec& b, const _Tpvec& c) \ + { return _Tpvec(_mm512_fmadd_##suffix(a.val, b.val, c.val)); } \ + inline _Tpvec v_sqrt(const _Tpvec& x) \ + { return _Tpvec(_mm512_sqrt_##suffix(x.val)); } \ + inline _Tpvec v_sqr_magnitude(const _Tpvec& a, const _Tpvec& b) \ + { return v_fma(a, a, b * b); } \ + inline _Tpvec v_magnitude(const _Tpvec& a, const _Tpvec& b) \ + { return v_sqrt(v_fma(a, a, b * b)); } + +OPENCV_HAL_IMPL_AVX512_MULADD(v_float32x16, ps) +OPENCV_HAL_IMPL_AVX512_MULADD(v_float64x8, pd) + +inline v_int32x16 v_fma(const v_int32x16& a, const v_int32x16& b, const v_int32x16& c) +{ return a * b + c; } +inline v_int32x16 v_muladd(const v_int32x16& a, const v_int32x16& b, const v_int32x16& c) +{ return v_fma(a, b, c); } + +inline v_float32x16 v_invsqrt(const v_float32x16& x) +{ +#if CV_AVX_512ER + return v_float32x16(_mm512_rsqrt28_ps(x.val)); +#else + v_float32x16 half = x * v512_setall_f32(0.5); + v_float32x16 t = v_float32x16(_mm512_rsqrt14_ps(x.val)); + t *= v512_setall_f32(1.5) - ((t * t) * half); + return t; +#endif +} + +inline v_float64x8 v_invsqrt(const v_float64x8& x) +{ +#if CV_AVX_512ER + return v_float64x8(_mm512_rsqrt28_pd(x.val)); +#else + return v512_setall_f64(1.) / v_sqrt(x); +// v_float64x8 half = x * v512_setall_f64(0.5); +// v_float64x8 t = v_float64x8(_mm512_rsqrt14_pd(x.val)); +// t *= v512_setall_f64(1.5) - ((t * t) * half); +// t *= v512_setall_f64(1.5) - ((t * t) * half); +// return t; +#endif +} + +/** Absolute values **/ +#define OPENCV_HAL_IMPL_AVX512_ABS(_Tpvec, _Tpuvec, suffix) \ + inline _Tpuvec v_abs(const _Tpvec& x) \ + { return _Tpuvec(_mm512_abs_##suffix(x.val)); } + +OPENCV_HAL_IMPL_AVX512_ABS(v_int8x64, v_uint8x64, epi8) +OPENCV_HAL_IMPL_AVX512_ABS(v_int16x32, v_uint16x32, epi16) +OPENCV_HAL_IMPL_AVX512_ABS(v_int32x16, v_uint32x16, epi32) +OPENCV_HAL_IMPL_AVX512_ABS(v_int64x8, v_uint64x8, epi64) + +inline v_float32x16 v_abs(const v_float32x16& x) +{ +#ifdef _mm512_abs_pd + return v_float32x16(_mm512_abs_ps(x.val)); +#else + return v_float32x16(_mm512_castsi512_ps(_mm512_and_si512(_mm512_castps_si512(x.val), + _v512_set_epu64(0x7FFFFFFF7FFFFFFF, 0x7FFFFFFF7FFFFFFF, 0x7FFFFFFF7FFFFFFF, 0x7FFFFFFF7FFFFFFF, + 0x7FFFFFFF7FFFFFFF, 0x7FFFFFFF7FFFFFFF, 0x7FFFFFFF7FFFFFFF, 0x7FFFFFFF7FFFFFFF)))); +#endif +} + +inline v_float64x8 v_abs(const v_float64x8& x) +{ +#ifdef _mm512_abs_pd + #if defined __GNUC__ && (__GNUC__ < 7 || (__GNUC__ == 7 && __GNUC_MINOR__ <= 3) || (__GNUC__ == 8 && __GNUC_MINOR__ <= 2)) + // Workaround for https://gcc.gnu.org/bugzilla/show_bug.cgi?id=87476 + return v_float64x8(_mm512_abs_pd(_mm512_castpd_ps(x.val))); + #else + return v_float64x8(_mm512_abs_pd(x.val)); + #endif +#else + return v_float64x8(_mm512_castsi512_pd(_mm512_and_si512(_mm512_castpd_si512(x.val), + _v512_set_epu64(0x7FFFFFFFFFFFFFFF, 0x7FFFFFFFFFFFFFFF, 0x7FFFFFFFFFFFFFFF, 0x7FFFFFFFFFFFFFFF, + 0x7FFFFFFFFFFFFFFF, 0x7FFFFFFFFFFFFFFF, 0x7FFFFFFFFFFFFFFF, 0x7FFFFFFFFFFFFFFF)))); +#endif +} + +/** Absolute difference **/ +inline v_uint8x64 v_absdiff(const v_uint8x64& a, const v_uint8x64& b) +{ return v_add_wrap(a - b, b - a); } +inline v_uint16x32 v_absdiff(const v_uint16x32& a, const v_uint16x32& b) +{ return v_add_wrap(a - b, b - a); } +inline v_uint32x16 v_absdiff(const v_uint32x16& a, const v_uint32x16& b) +{ return v_max(a, b) - v_min(a, b); } + +inline v_uint8x64 v_absdiff(const v_int8x64& a, const v_int8x64& b) +{ + v_int8x64 d = v_sub_wrap(a, b); + v_int8x64 m = a < b; + return v_reinterpret_as_u8(v_sub_wrap(d ^ m, m)); +} + +inline v_uint16x32 v_absdiff(const v_int16x32& a, const v_int16x32& b) +{ return v_reinterpret_as_u16(v_sub_wrap(v_max(a, b), v_min(a, b))); } + +inline v_uint32x16 v_absdiff(const v_int32x16& a, const v_int32x16& b) +{ + v_int32x16 d = a - b; + v_int32x16 m = a < b; + return v_reinterpret_as_u32((d ^ m) - m); +} + +inline v_float32x16 v_absdiff(const v_float32x16& a, const v_float32x16& b) +{ return v_abs(a - b); } + +inline v_float64x8 v_absdiff(const v_float64x8& a, const v_float64x8& b) +{ return v_abs(a - b); } + +/** Saturating absolute difference **/ +inline v_int8x64 v_absdiffs(const v_int8x64& a, const v_int8x64& b) +{ + v_int8x64 d = a - b; + v_int8x64 m = a < b; + return (d ^ m) - m; +} +inline v_int16x32 v_absdiffs(const v_int16x32& a, const v_int16x32& b) +{ return v_max(a, b) - v_min(a, b); } + +////////// Conversions ///////// + +/** Rounding **/ +inline v_int32x16 v_round(const v_float32x16& a) +{ return v_int32x16(_mm512_cvtps_epi32(a.val)); } + +inline v_int32x16 v_round(const v_float64x8& a) +{ return v_int32x16(_mm512_castsi256_si512(_mm512_cvtpd_epi32(a.val))); } + +inline v_int32x16 v_round(const v_float64x8& a, const v_float64x8& b) +{ return v_int32x16(_v512_combine(_mm512_cvtpd_epi32(a.val), _mm512_cvtpd_epi32(b.val))); } + +inline v_int32x16 v_trunc(const v_float32x16& a) +{ return v_int32x16(_mm512_cvttps_epi32(a.val)); } + +inline v_int32x16 v_trunc(const v_float64x8& a) +{ return v_int32x16(_mm512_castsi256_si512(_mm512_cvttpd_epi32(a.val))); } + +#if CVT_ROUND_MODES_IMPLEMENTED +inline v_int32x16 v_floor(const v_float32x16& a) +{ return v_int32x16(_mm512_cvt_roundps_epi32(a.val, _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC)); } + +inline v_int32x16 v_floor(const v_float64x8& a) +{ return v_int32x16(_mm512_castsi256_si512(_mm512_cvt_roundpd_epi32(a.val, _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC))); } + +inline v_int32x16 v_ceil(const v_float32x16& a) +{ return v_int32x16(_mm512_cvt_roundps_epi32(a.val, _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC)); } + +inline v_int32x16 v_ceil(const v_float64x8& a) +{ return v_int32x16(_mm512_castsi256_si512(_mm512_cvt_roundpd_epi32(a.val, _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC))); } +#else +inline v_int32x16 v_floor(const v_float32x16& a) +{ return v_int32x16(_mm512_cvtps_epi32(_mm512_roundscale_ps(a.val, 1))); } + +inline v_int32x16 v_floor(const v_float64x8& a) +{ return v_int32x16(_mm512_castsi256_si512(_mm512_cvtpd_epi32(_mm512_roundscale_pd(a.val, 1)))); } + +inline v_int32x16 v_ceil(const v_float32x16& a) +{ return v_int32x16(_mm512_cvtps_epi32(_mm512_roundscale_ps(a.val, 2))); } + +inline v_int32x16 v_ceil(const v_float64x8& a) +{ return v_int32x16(_mm512_castsi256_si512(_mm512_cvtpd_epi32(_mm512_roundscale_pd(a.val, 2)))); } +#endif + +/** To float **/ +inline v_float32x16 v_cvt_f32(const v_int32x16& a) +{ return v_float32x16(_mm512_cvtepi32_ps(a.val)); } + +inline v_float32x16 v_cvt_f32(const v_float64x8& a) +{ return v_float32x16(_mm512_cvtpd_pslo(a.val)); } + +inline v_float32x16 v_cvt_f32(const v_float64x8& a, const v_float64x8& b) +{ return v_float32x16(_v512_combine(_mm512_cvtpd_ps(a.val), _mm512_cvtpd_ps(b.val))); } + +inline v_float64x8 v_cvt_f64(const v_int32x16& a) +{ return v_float64x8(_mm512_cvtepi32_pd(_v512_extract_low(a.val))); } + +inline v_float64x8 v_cvt_f64_high(const v_int32x16& a) +{ return v_float64x8(_mm512_cvtepi32_pd(_v512_extract_high(a.val))); } + +inline v_float64x8 v_cvt_f64(const v_float32x16& a) +{ return v_float64x8(_mm512_cvtps_pd(_v512_extract_low(a.val))); } + +inline v_float64x8 v_cvt_f64_high(const v_float32x16& a) +{ return v_float64x8(_mm512_cvtps_pd(_v512_extract_high(a.val))); } + +// from (Mysticial and wim) https://stackoverflow.com/q/41144668 +inline v_float64x8 v_cvt_f64(const v_int64x8& v) +{ +#if CV_AVX_512DQ + return v_float64x8(_mm512_cvtepi64_pd(v.val)); +#else + // constants encoded as floating-point + __m512i magic_i_lo = _mm512_set1_epi64(0x4330000000000000); // 2^52 + __m512i magic_i_hi32 = _mm512_set1_epi64(0x4530000080000000); // 2^84 + 2^63 + __m512i magic_i_all = _mm512_set1_epi64(0x4530000080100000); // 2^84 + 2^63 + 2^52 + __m512d magic_d_all = _mm512_castsi512_pd(magic_i_all); + + // Blend the 32 lowest significant bits of v with magic_int_lo + __m512i v_lo = _mm512_mask_blend_epi32(0x5555, magic_i_lo, v.val); + // Extract the 32 most significant bits of v + __m512i v_hi = _mm512_srli_epi64(v.val, 32); + // Flip the msb of v_hi and blend with 0x45300000 + v_hi = _mm512_xor_si512(v_hi, magic_i_hi32); + // Compute in double precision + __m512d v_hi_dbl = _mm512_sub_pd(_mm512_castsi512_pd(v_hi), magic_d_all); + // (v_hi - magic_d_all) + v_lo Do not assume associativity of floating point addition + __m512d result = _mm512_add_pd(v_hi_dbl, _mm512_castsi512_pd(v_lo)); + return v_float64x8(result); +#endif +} + +////////////// Lookup table access //////////////////// + +inline v_int8x64 v512_lut(const schar* tab, const int* idx) +{ + __m128i p0 = _mm512_cvtepi32_epi8(_mm512_i32gather_epi32(_mm512_loadu_si512((const __m512i*)idx ), (const int *)tab, 1)); + __m128i p1 = _mm512_cvtepi32_epi8(_mm512_i32gather_epi32(_mm512_loadu_si512((const __m512i*)idx + 1), (const int *)tab, 1)); + __m128i p2 = _mm512_cvtepi32_epi8(_mm512_i32gather_epi32(_mm512_loadu_si512((const __m512i*)idx + 2), (const int *)tab, 1)); + __m128i p3 = _mm512_cvtepi32_epi8(_mm512_i32gather_epi32(_mm512_loadu_si512((const __m512i*)idx + 3), (const int *)tab, 1)); + return v_int8x64(_mm512_inserti32x4(_mm512_inserti32x4(_mm512_inserti32x4(_mm512_castsi128_si512(p0), p1, 1), p2, 2), p3, 3)); +} +inline v_int8x64 v512_lut_pairs(const schar* tab, const int* idx) +{ + __m256i p0 = _mm512_cvtepi32_epi16(_mm512_i32gather_epi32(_mm512_loadu_si512((const __m512i*)idx ), (const int *)tab, 1)); + __m256i p1 = _mm512_cvtepi32_epi16(_mm512_i32gather_epi32(_mm512_loadu_si512((const __m512i*)idx + 1), (const int *)tab, 1)); + return v_int8x64(_v512_combine(p0, p1)); +} +inline v_int8x64 v512_lut_quads(const schar* tab, const int* idx) +{ + return v_int8x64(_mm512_i32gather_epi32(_mm512_loadu_si512((const __m512i*)idx), (const int *)tab, 1)); +} +inline v_uint8x64 v512_lut(const uchar* tab, const int* idx) { return v_reinterpret_as_u8(v512_lut((const schar *)tab, idx)); } +inline v_uint8x64 v512_lut_pairs(const uchar* tab, const int* idx) { return v_reinterpret_as_u8(v512_lut_pairs((const schar *)tab, idx)); } +inline v_uint8x64 v512_lut_quads(const uchar* tab, const int* idx) { return v_reinterpret_as_u8(v512_lut_quads((const schar *)tab, idx)); } + +inline v_int16x32 v512_lut(const short* tab, const int* idx) +{ + __m256i p0 = _mm512_cvtepi32_epi16(_mm512_i32gather_epi32(_mm512_loadu_si512((const __m512i*)idx ), (const int *)tab, 2)); + __m256i p1 = _mm512_cvtepi32_epi16(_mm512_i32gather_epi32(_mm512_loadu_si512((const __m512i*)idx + 1), (const int *)tab, 2)); + return v_int16x32(_v512_combine(p0, p1)); +} +inline v_int16x32 v512_lut_pairs(const short* tab, const int* idx) +{ + return v_int16x32(_mm512_i32gather_epi32(_mm512_loadu_si512((const __m512i*)idx), (const int *)tab, 2)); +} +inline v_int16x32 v512_lut_quads(const short* tab, const int* idx) +{ +#if defined(__GNUC__) + return v_int16x32(_mm512_i32gather_epi64(_mm256_loadu_si256((const __m256i*)idx), (const long long int*)tab, 2)); +#else + return v_int16x32(_mm512_i32gather_epi64(_mm256_loadu_si256((const __m256i*)idx), (const int64*)tab, 2)); +#endif +} +inline v_uint16x32 v512_lut(const ushort* tab, const int* idx) { return v_reinterpret_as_u16(v512_lut((const short *)tab, idx)); } +inline v_uint16x32 v512_lut_pairs(const ushort* tab, const int* idx) { return v_reinterpret_as_u16(v512_lut_pairs((const short *)tab, idx)); } +inline v_uint16x32 v512_lut_quads(const ushort* tab, const int* idx) { return v_reinterpret_as_u16(v512_lut_quads((const short *)tab, idx)); } + +inline v_int32x16 v512_lut(const int* tab, const int* idx) +{ + return v_int32x16(_mm512_i32gather_epi32(_mm512_loadu_si512((const __m512i*)idx), tab, 4)); +} +inline v_int32x16 v512_lut_pairs(const int* tab, const int* idx) +{ +#if defined(__GNUC__) + return v_int32x16(_mm512_i32gather_epi64(_mm256_loadu_si256((const __m256i*)idx), (const long long int*)tab, 4)); +#else + return v_int32x16(_mm512_i32gather_epi64(_mm256_loadu_si256((const __m256i*)idx), (const int64*)tab, 4)); +#endif +} +inline v_int32x16 v512_lut_quads(const int* tab, const int* idx) +{ + return v_int32x16(_mm512_inserti32x4(_mm512_inserti32x4(_mm512_inserti32x4(_mm512_castsi128_si512( + _mm_loadu_si128((const __m128i*)(tab + idx[0]))), + _mm_loadu_si128((const __m128i*)(tab + idx[1])), 1), + _mm_loadu_si128((const __m128i*)(tab + idx[2])), 2), + _mm_loadu_si128((const __m128i*)(tab + idx[3])), 3)); +} +inline v_uint32x16 v512_lut(const unsigned* tab, const int* idx) { return v_reinterpret_as_u32(v512_lut((const int *)tab, idx)); } +inline v_uint32x16 v512_lut_pairs(const unsigned* tab, const int* idx) { return v_reinterpret_as_u32(v512_lut_pairs((const int *)tab, idx)); } +inline v_uint32x16 v512_lut_quads(const unsigned* tab, const int* idx) { return v_reinterpret_as_u32(v512_lut_quads((const int *)tab, idx)); } + +inline v_int64x8 v512_lut(const int64* tab, const int* idx) +{ +#if defined(__GNUC__) + return v_int64x8(_mm512_i32gather_epi64(_mm256_loadu_si256((const __m256i*)idx), (const long long int*)tab, 8)); +#else + return v_int64x8(_mm512_i32gather_epi64(_mm256_loadu_si256((const __m256i*)idx), tab , 8)); +#endif +} +inline v_int64x8 v512_lut_pairs(const int64* tab, const int* idx) +{ + return v_int64x8(_mm512_inserti32x4(_mm512_inserti32x4(_mm512_inserti32x4(_mm512_castsi128_si512( + _mm_loadu_si128((const __m128i*)(tab + idx[0]))), + _mm_loadu_si128((const __m128i*)(tab + idx[1])), 1), + _mm_loadu_si128((const __m128i*)(tab + idx[2])), 2), + _mm_loadu_si128((const __m128i*)(tab + idx[3])), 3)); +} +inline v_uint64x8 v512_lut(const uint64* tab, const int* idx) { return v_reinterpret_as_u64(v512_lut((const int64 *)tab, idx)); } +inline v_uint64x8 v512_lut_pairs(const uint64* tab, const int* idx) { return v_reinterpret_as_u64(v512_lut_pairs((const int64 *)tab, idx)); } + +inline v_float32x16 v512_lut(const float* tab, const int* idx) +{ + return v_float32x16(_mm512_i32gather_ps(_mm512_loadu_si512((const __m512i*)idx), tab, 4)); +} +inline v_float32x16 v512_lut_pairs(const float* tab, const int* idx) { return v_reinterpret_as_f32(v512_lut_pairs((const int *)tab, idx)); } +inline v_float32x16 v512_lut_quads(const float* tab, const int* idx) { return v_reinterpret_as_f32(v512_lut_quads((const int *)tab, idx)); } + +inline v_float64x8 v512_lut(const double* tab, const int* idx) +{ + return v_float64x8(_mm512_i32gather_pd(_mm256_loadu_si256((const __m256i*)idx), tab, 8)); +} +inline v_float64x8 v512_lut_pairs(const double* tab, const int* idx) +{ + return v_float64x8(_mm512_insertf64x2(_mm512_insertf64x2(_mm512_insertf64x2(_mm512_castpd128_pd512( + _mm_loadu_pd(tab + idx[0])), + _mm_loadu_pd(tab + idx[1]), 1), + _mm_loadu_pd(tab + idx[2]), 2), + _mm_loadu_pd(tab + idx[3]), 3)); +} + +inline v_int32x16 v_lut(const int* tab, const v_int32x16& idxvec) +{ + return v_int32x16(_mm512_i32gather_epi32(idxvec.val, tab, 4)); +} + +inline v_uint32x16 v_lut(const unsigned* tab, const v_int32x16& idxvec) +{ + return v_reinterpret_as_u32(v_lut((const int *)tab, idxvec)); +} + +inline v_float32x16 v_lut(const float* tab, const v_int32x16& idxvec) +{ + return v_float32x16(_mm512_i32gather_ps(idxvec.val, tab, 4)); +} + +inline v_float64x8 v_lut(const double* tab, const v_int32x16& idxvec) +{ + return v_float64x8(_mm512_i32gather_pd(_v512_extract_low(idxvec.val), tab, 8)); +} + +inline void v_lut_deinterleave(const float* tab, const v_int32x16& idxvec, v_float32x16& x, v_float32x16& y) +{ + x.val = _mm512_i32gather_ps(idxvec.val, tab, 4); + y.val = _mm512_i32gather_ps(idxvec.val, &tab[1], 4); +} + +inline void v_lut_deinterleave(const double* tab, const v_int32x16& idxvec, v_float64x8& x, v_float64x8& y) +{ + x.val = _mm512_i32gather_pd(_v512_extract_low(idxvec.val), tab, 8); + y.val = _mm512_i32gather_pd(_v512_extract_low(idxvec.val), &tab[1], 8); +} + +inline v_int8x64 v_interleave_pairs(const v_int8x64& vec) +{ + return v_int8x64(_mm512_shuffle_epi8(vec.val, _mm512_set4_epi32(0x0f0d0e0c, 0x0b090a08, 0x07050604, 0x03010200))); +} +inline v_uint8x64 v_interleave_pairs(const v_uint8x64& vec) { return v_reinterpret_as_u8(v_interleave_pairs(v_reinterpret_as_s8(vec))); } +inline v_int8x64 v_interleave_quads(const v_int8x64& vec) +{ + return v_int8x64(_mm512_shuffle_epi8(vec.val, _mm512_set4_epi32(0x0f0b0e0a, 0x0d090c08, 0x07030602, 0x05010400))); +} +inline v_uint8x64 v_interleave_quads(const v_uint8x64& vec) { return v_reinterpret_as_u8(v_interleave_quads(v_reinterpret_as_s8(vec))); } + +inline v_int16x32 v_interleave_pairs(const v_int16x32& vec) +{ + return v_int16x32(_mm512_shuffle_epi8(vec.val, _mm512_set4_epi32(0x0f0e0b0a, 0x0d0c0908, 0x07060302, 0x05040100))); +} +inline v_uint16x32 v_interleave_pairs(const v_uint16x32& vec) { return v_reinterpret_as_u16(v_interleave_pairs(v_reinterpret_as_s16(vec))); } +inline v_int16x32 v_interleave_quads(const v_int16x32& vec) +{ + return v_int16x32(_mm512_shuffle_epi8(vec.val, _mm512_set4_epi32(0x0f0e0706, 0x0d0c0504, 0x0b0a0302, 0x09080100))); +} +inline v_uint16x32 v_interleave_quads(const v_uint16x32& vec) { return v_reinterpret_as_u16(v_interleave_quads(v_reinterpret_as_s16(vec))); } + +inline v_int32x16 v_interleave_pairs(const v_int32x16& vec) +{ + return v_int32x16(_mm512_shuffle_epi32(vec.val, _MM_PERM_ACBD)); +} +inline v_uint32x16 v_interleave_pairs(const v_uint32x16& vec) { return v_reinterpret_as_u32(v_interleave_pairs(v_reinterpret_as_s32(vec))); } +inline v_float32x16 v_interleave_pairs(const v_float32x16& vec) { return v_reinterpret_as_f32(v_interleave_pairs(v_reinterpret_as_s32(vec))); } + +inline v_int8x64 v_pack_triplets(const v_int8x64& vec) +{ + return v_int8x64(_mm512_permutexvar_epi32(_v512_set_epu64(0x0000000f0000000f, 0x0000000f0000000f, 0x0000000e0000000d, 0x0000000c0000000a, + 0x0000000900000008, 0x0000000600000005, 0x0000000400000002, 0x0000000100000000), + _mm512_shuffle_epi8(vec.val, _mm512_set4_epi32(0xffffff0f, 0x0e0d0c0a, 0x09080605, 0x04020100)))); +} +inline v_uint8x64 v_pack_triplets(const v_uint8x64& vec) { return v_reinterpret_as_u8(v_pack_triplets(v_reinterpret_as_s8(vec))); } + +inline v_int16x32 v_pack_triplets(const v_int16x32& vec) +{ + return v_int16x32(_mm512_permutexvar_epi16(_v512_set_epu64(0x001f001f001f001f, 0x001f001f001f001f, 0x001e001d001c001a, 0x0019001800160015, + 0x0014001200110010, 0x000e000d000c000a, 0x0009000800060005, 0x0004000200010000), vec.val)); +} +inline v_uint16x32 v_pack_triplets(const v_uint16x32& vec) { return v_reinterpret_as_u16(v_pack_triplets(v_reinterpret_as_s16(vec))); } + +inline v_int32x16 v_pack_triplets(const v_int32x16& vec) +{ + return v_int32x16(_mm512_permutexvar_epi32(_v512_set_epu64(0x0000000f0000000f, 0x0000000f0000000f, 0x0000000e0000000d, 0x0000000c0000000a, + 0x0000000900000008, 0x0000000600000005, 0x0000000400000002, 0x0000000100000000), vec.val)); +} +inline v_uint32x16 v_pack_triplets(const v_uint32x16& vec) { return v_reinterpret_as_u32(v_pack_triplets(v_reinterpret_as_s32(vec))); } +inline v_float32x16 v_pack_triplets(const v_float32x16& vec) +{ + return v_float32x16(_mm512_permutexvar_ps(_v512_set_epu64(0x0000000f0000000f, 0x0000000f0000000f, 0x0000000e0000000d, 0x0000000c0000000a, + 0x0000000900000008, 0x0000000600000005, 0x0000000400000002, 0x0000000100000000), vec.val)); +} + +////////// Matrix operations ///////// + +//////// Dot Product //////// + +// 16 >> 32 +inline v_int32x16 v_dotprod(const v_int16x32& a, const v_int16x32& b) +{ return v_int32x16(_mm512_madd_epi16(a.val, b.val)); } +inline v_int32x16 v_dotprod(const v_int16x32& a, const v_int16x32& b, const v_int32x16& c) +{ return v_dotprod(a, b) + c; } + +// 32 >> 64 +inline v_int64x8 v_dotprod(const v_int32x16& a, const v_int32x16& b) +{ + __m512i even = _mm512_mul_epi32(a.val, b.val); + __m512i odd = _mm512_mul_epi32(_mm512_srli_epi64(a.val, 32), _mm512_srli_epi64(b.val, 32)); + return v_int64x8(_mm512_add_epi64(even, odd)); +} +inline v_int64x8 v_dotprod(const v_int32x16& a, const v_int32x16& b, const v_int64x8& c) +{ return v_dotprod(a, b) + c; } + +// 8 >> 32 +inline v_uint32x16 v_dotprod_expand(const v_uint8x64& a, const v_uint8x64& b) +{ + __m512i even_a = _mm512_mask_blend_epi8(0xAAAAAAAAAAAAAAAA, a.val, _mm512_setzero_si512()); + __m512i odd_a = _mm512_srli_epi16(a.val, 8); + + __m512i even_b = _mm512_mask_blend_epi8(0xAAAAAAAAAAAAAAAA, b.val, _mm512_setzero_si512()); + __m512i odd_b = _mm512_srli_epi16(b.val, 8); + + __m512i prod0 = _mm512_madd_epi16(even_a, even_b); + __m512i prod1 = _mm512_madd_epi16(odd_a, odd_b); + return v_uint32x16(_mm512_add_epi32(prod0, prod1)); +} +inline v_uint32x16 v_dotprod_expand(const v_uint8x64& a, const v_uint8x64& b, const v_uint32x16& c) +{ return v_dotprod_expand(a, b) + c; } + +inline v_int32x16 v_dotprod_expand(const v_int8x64& a, const v_int8x64& b) +{ + __m512i even_a = _mm512_srai_epi16(_mm512_bslli_epi128(a.val, 1), 8); + __m512i odd_a = _mm512_srai_epi16(a.val, 8); + + __m512i even_b = _mm512_srai_epi16(_mm512_bslli_epi128(b.val, 1), 8); + __m512i odd_b = _mm512_srai_epi16(b.val, 8); + + __m512i prod0 = _mm512_madd_epi16(even_a, even_b); + __m512i prod1 = _mm512_madd_epi16(odd_a, odd_b); + return v_int32x16(_mm512_add_epi32(prod0, prod1)); +} +inline v_int32x16 v_dotprod_expand(const v_int8x64& a, const v_int8x64& b, const v_int32x16& c) +{ return v_dotprod_expand(a, b) + c; } + +// 16 >> 64 +inline v_uint64x8 v_dotprod_expand(const v_uint16x32& a, const v_uint16x32& b) +{ + __m512i mullo = _mm512_mullo_epi16(a.val, b.val); + __m512i mulhi = _mm512_mulhi_epu16(a.val, b.val); + __m512i mul0 = _mm512_unpacklo_epi16(mullo, mulhi); + __m512i mul1 = _mm512_unpackhi_epi16(mullo, mulhi); + + __m512i p02 = _mm512_mask_blend_epi32(0xAAAA, mul0, _mm512_setzero_si512()); + __m512i p13 = _mm512_srli_epi64(mul0, 32); + __m512i p46 = _mm512_mask_blend_epi32(0xAAAA, mul1, _mm512_setzero_si512()); + __m512i p57 = _mm512_srli_epi64(mul1, 32); + + __m512i p15_ = _mm512_add_epi64(p02, p13); + __m512i p9d_ = _mm512_add_epi64(p46, p57); + + return v_uint64x8(_mm512_add_epi64( + _mm512_unpacklo_epi64(p15_, p9d_), + _mm512_unpackhi_epi64(p15_, p9d_) + )); +} +inline v_uint64x8 v_dotprod_expand(const v_uint16x32& a, const v_uint16x32& b, const v_uint64x8& c) +{ return v_dotprod_expand(a, b) + c; } + +inline v_int64x8 v_dotprod_expand(const v_int16x32& a, const v_int16x32& b) +{ + __m512i prod = _mm512_madd_epi16(a.val, b.val); + __m512i even = _mm512_srai_epi64(_mm512_bslli_epi128(prod, 4), 32); + __m512i odd = _mm512_srai_epi64(prod, 32); + return v_int64x8(_mm512_add_epi64(even, odd)); +} +inline v_int64x8 v_dotprod_expand(const v_int16x32& a, const v_int16x32& b, const v_int64x8& c) +{ return v_dotprod_expand(a, b) + c; } + +// 32 >> 64f +inline v_float64x8 v_dotprod_expand(const v_int32x16& a, const v_int32x16& b) +{ return v_cvt_f64(v_dotprod(a, b)); } +inline v_float64x8 v_dotprod_expand(const v_int32x16& a, const v_int32x16& b, const v_float64x8& c) +{ return v_dotprod_expand(a, b) + c; } + +//////// Fast Dot Product //////// + +// 16 >> 32 +inline v_int32x16 v_dotprod_fast(const v_int16x32& a, const v_int16x32& b) +{ return v_dotprod(a, b); } +inline v_int32x16 v_dotprod_fast(const v_int16x32& a, const v_int16x32& b, const v_int32x16& c) +{ return v_dotprod(a, b, c); } + +// 32 >> 64 +inline v_int64x8 v_dotprod_fast(const v_int32x16& a, const v_int32x16& b) +{ return v_dotprod(a, b); } +inline v_int64x8 v_dotprod_fast(const v_int32x16& a, const v_int32x16& b, const v_int64x8& c) +{ return v_dotprod(a, b, c); } + +// 8 >> 32 +inline v_uint32x16 v_dotprod_expand_fast(const v_uint8x64& a, const v_uint8x64& b) +{ return v_dotprod_expand(a, b); } +inline v_uint32x16 v_dotprod_expand_fast(const v_uint8x64& a, const v_uint8x64& b, const v_uint32x16& c) +{ return v_dotprod_expand(a, b, c); } + +inline v_int32x16 v_dotprod_expand_fast(const v_int8x64& a, const v_int8x64& b) +{ return v_dotprod_expand(a, b); } +inline v_int32x16 v_dotprod_expand_fast(const v_int8x64& a, const v_int8x64& b, const v_int32x16& c) +{ return v_dotprod_expand(a, b, c); } + +// 16 >> 64 +inline v_uint64x8 v_dotprod_expand_fast(const v_uint16x32& a, const v_uint16x32& b) +{ + __m512i mullo = _mm512_mullo_epi16(a.val, b.val); + __m512i mulhi = _mm512_mulhi_epu16(a.val, b.val); + __m512i mul0 = _mm512_unpacklo_epi16(mullo, mulhi); + __m512i mul1 = _mm512_unpackhi_epi16(mullo, mulhi); + + __m512i p02 = _mm512_mask_blend_epi32(0xAAAA, mul0, _mm512_setzero_si512()); + __m512i p13 = _mm512_srli_epi64(mul0, 32); + __m512i p46 = _mm512_mask_blend_epi32(0xAAAA, mul1, _mm512_setzero_si512()); + __m512i p57 = _mm512_srli_epi64(mul1, 32); + + __m512i p15_ = _mm512_add_epi64(p02, p13); + __m512i p9d_ = _mm512_add_epi64(p46, p57); + return v_uint64x8(_mm512_add_epi64(p15_, p9d_)); +} +inline v_uint64x8 v_dotprod_expand_fast(const v_uint16x32& a, const v_uint16x32& b, const v_uint64x8& c) +{ return v_dotprod_expand_fast(a, b) + c; } + +inline v_int64x8 v_dotprod_expand_fast(const v_int16x32& a, const v_int16x32& b) +{ return v_dotprod_expand(a, b); } +inline v_int64x8 v_dotprod_expand_fast(const v_int16x32& a, const v_int16x32& b, const v_int64x8& c) +{ return v_dotprod_expand(a, b, c); } + +// 32 >> 64f +inline v_float64x8 v_dotprod_expand_fast(const v_int32x16& a, const v_int32x16& b) +{ return v_dotprod_expand(a, b); } +inline v_float64x8 v_dotprod_expand_fast(const v_int32x16& a, const v_int32x16& b, const v_float64x8& c) +{ return v_dotprod_expand(a, b) + c; } + + +#define OPENCV_HAL_AVX512_SPLAT2_PS(a, im) \ + v_float32x16(_mm512_permute_ps(a.val, _MM_SHUFFLE(im, im, im, im))) + +inline v_float32x16 v_matmul(const v_float32x16& v, + const v_float32x16& m0, const v_float32x16& m1, + const v_float32x16& m2, const v_float32x16& m3) +{ + v_float32x16 v04 = OPENCV_HAL_AVX512_SPLAT2_PS(v, 0); + v_float32x16 v15 = OPENCV_HAL_AVX512_SPLAT2_PS(v, 1); + v_float32x16 v26 = OPENCV_HAL_AVX512_SPLAT2_PS(v, 2); + v_float32x16 v37 = OPENCV_HAL_AVX512_SPLAT2_PS(v, 3); + return v_fma(v04, m0, v_fma(v15, m1, v_fma(v26, m2, v37 * m3))); +} + +inline v_float32x16 v_matmuladd(const v_float32x16& v, + const v_float32x16& m0, const v_float32x16& m1, + const v_float32x16& m2, const v_float32x16& a) +{ + v_float32x16 v04 = OPENCV_HAL_AVX512_SPLAT2_PS(v, 0); + v_float32x16 v15 = OPENCV_HAL_AVX512_SPLAT2_PS(v, 1); + v_float32x16 v26 = OPENCV_HAL_AVX512_SPLAT2_PS(v, 2); + return v_fma(v04, m0, v_fma(v15, m1, v_fma(v26, m2, a))); +} + +#define OPENCV_HAL_IMPL_AVX512_TRANSPOSE4x4(_Tpvec, suffix, cast_from, cast_to) \ + inline void v_transpose4x4(const _Tpvec& a0, const _Tpvec& a1, \ + const _Tpvec& a2, const _Tpvec& a3, \ + _Tpvec& b0, _Tpvec& b1, _Tpvec& b2, _Tpvec& b3) \ + { \ + __m512i t0 = cast_from(_mm512_unpacklo_##suffix(a0.val, a1.val)); \ + __m512i t1 = cast_from(_mm512_unpacklo_##suffix(a2.val, a3.val)); \ + __m512i t2 = cast_from(_mm512_unpackhi_##suffix(a0.val, a1.val)); \ + __m512i t3 = cast_from(_mm512_unpackhi_##suffix(a2.val, a3.val)); \ + b0.val = cast_to(_mm512_unpacklo_epi64(t0, t1)); \ + b1.val = cast_to(_mm512_unpackhi_epi64(t0, t1)); \ + b2.val = cast_to(_mm512_unpacklo_epi64(t2, t3)); \ + b3.val = cast_to(_mm512_unpackhi_epi64(t2, t3)); \ + } + +OPENCV_HAL_IMPL_AVX512_TRANSPOSE4x4(v_uint32x16, epi32, OPENCV_HAL_NOP, OPENCV_HAL_NOP) +OPENCV_HAL_IMPL_AVX512_TRANSPOSE4x4(v_int32x16, epi32, OPENCV_HAL_NOP, OPENCV_HAL_NOP) +OPENCV_HAL_IMPL_AVX512_TRANSPOSE4x4(v_float32x16, ps, _mm512_castps_si512, _mm512_castsi512_ps) + +//////////////// Value reordering /////////////// + +/* Expand */ +#define OPENCV_HAL_IMPL_AVX512_EXPAND(_Tpvec, _Tpwvec, _Tp, intrin) \ + inline void v_expand(const _Tpvec& a, _Tpwvec& b0, _Tpwvec& b1) \ + { \ + b0.val = intrin(_v512_extract_low(a.val)); \ + b1.val = intrin(_v512_extract_high(a.val)); \ + } \ + inline _Tpwvec v_expand_low(const _Tpvec& a) \ + { return _Tpwvec(intrin(_v512_extract_low(a.val))); } \ + inline _Tpwvec v_expand_high(const _Tpvec& a) \ + { return _Tpwvec(intrin(_v512_extract_high(a.val))); } \ + inline _Tpwvec v512_load_expand(const _Tp* ptr) \ + { \ + __m256i a = _mm256_loadu_si256((const __m256i*)ptr); \ + return _Tpwvec(intrin(a)); \ + } + +OPENCV_HAL_IMPL_AVX512_EXPAND(v_uint8x64, v_uint16x32, uchar, _mm512_cvtepu8_epi16) +OPENCV_HAL_IMPL_AVX512_EXPAND(v_int8x64, v_int16x32, schar, _mm512_cvtepi8_epi16) +OPENCV_HAL_IMPL_AVX512_EXPAND(v_uint16x32, v_uint32x16, ushort, _mm512_cvtepu16_epi32) +OPENCV_HAL_IMPL_AVX512_EXPAND(v_int16x32, v_int32x16, short, _mm512_cvtepi16_epi32) +OPENCV_HAL_IMPL_AVX512_EXPAND(v_uint32x16, v_uint64x8, unsigned, _mm512_cvtepu32_epi64) +OPENCV_HAL_IMPL_AVX512_EXPAND(v_int32x16, v_int64x8, int, _mm512_cvtepi32_epi64) + +#define OPENCV_HAL_IMPL_AVX512_EXPAND_Q(_Tpvec, _Tp, intrin) \ + inline _Tpvec v512_load_expand_q(const _Tp* ptr) \ + { \ + __m128i a = _mm_loadu_si128((const __m128i*)ptr); \ + return _Tpvec(intrin(a)); \ + } + +OPENCV_HAL_IMPL_AVX512_EXPAND_Q(v_uint32x16, uchar, _mm512_cvtepu8_epi32) +OPENCV_HAL_IMPL_AVX512_EXPAND_Q(v_int32x16, schar, _mm512_cvtepi8_epi32) + +/* pack */ +// 16 +inline v_int8x64 v_pack(const v_int16x32& a, const v_int16x32& b) +{ return v_int8x64(_mm512_permutexvar_epi64(_v512_set_epu64(7, 5, 3, 1, 6, 4, 2, 0), _mm512_packs_epi16(a.val, b.val))); } + +inline v_uint8x64 v_pack(const v_uint16x32& a, const v_uint16x32& b) +{ + const __m512i t = _mm512_set1_epi16(255); + return v_uint8x64(_v512_combine(_mm512_cvtepi16_epi8(_mm512_min_epu16(a.val, t)), _mm512_cvtepi16_epi8(_mm512_min_epu16(b.val, t)))); +} + +inline v_uint8x64 v_pack_u(const v_int16x32& a, const v_int16x32& b) +{ + return v_uint8x64(_mm512_permutexvar_epi64(_v512_set_epu64(7, 5, 3, 1, 6, 4, 2, 0), _mm512_packus_epi16(a.val, b.val))); +} + +inline void v_pack_store(schar* ptr, const v_int16x32& a) +{ v_store_low(ptr, v_pack(a, a)); } + +inline void v_pack_store(uchar* ptr, const v_uint16x32& a) +{ + const __m512i m = _mm512_set1_epi16(255); + _mm256_storeu_si256((__m256i*)ptr, _mm512_cvtepi16_epi8(_mm512_min_epu16(a.val, m))); +} + +inline void v_pack_u_store(uchar* ptr, const v_int16x32& a) +{ v_store_low(ptr, v_pack_u(a, a)); } + +template inline +v_uint8x64 v_rshr_pack(const v_uint16x32& a, const v_uint16x32& b) +{ + // we assume that n > 0, and so the shifted 16-bit values can be treated as signed numbers. + v_uint16x32 delta = v512_setall_u16((short)(1 << (n-1))); + return v_pack_u(v_reinterpret_as_s16((a + delta) >> n), + v_reinterpret_as_s16((b + delta) >> n)); +} + +template inline +void v_rshr_pack_store(uchar* ptr, const v_uint16x32& a) +{ + v_uint16x32 delta = v512_setall_u16((short)(1 << (n-1))); + v_pack_u_store(ptr, v_reinterpret_as_s16((a + delta) >> n)); +} + +template inline +v_uint8x64 v_rshr_pack_u(const v_int16x32& a, const v_int16x32& b) +{ + v_int16x32 delta = v512_setall_s16((short)(1 << (n-1))); + return v_pack_u((a + delta) >> n, (b + delta) >> n); +} + +template inline +void v_rshr_pack_u_store(uchar* ptr, const v_int16x32& a) +{ + v_int16x32 delta = v512_setall_s16((short)(1 << (n-1))); + v_pack_u_store(ptr, (a + delta) >> n); +} + +template inline +v_int8x64 v_rshr_pack(const v_int16x32& a, const v_int16x32& b) +{ + v_int16x32 delta = v512_setall_s16((short)(1 << (n-1))); + return v_pack((a + delta) >> n, (b + delta) >> n); +} + +template inline +void v_rshr_pack_store(schar* ptr, const v_int16x32& a) +{ + v_int16x32 delta = v512_setall_s16((short)(1 << (n-1))); + v_pack_store(ptr, (a + delta) >> n); +} + +// 32 +inline v_int16x32 v_pack(const v_int32x16& a, const v_int32x16& b) +{ return v_int16x32(_mm512_permutexvar_epi64(_v512_set_epu64(7, 5, 3, 1, 6, 4, 2, 0), _mm512_packs_epi32(a.val, b.val))); } + +inline v_uint16x32 v_pack(const v_uint32x16& a, const v_uint32x16& b) +{ + const __m512i m = _mm512_set1_epi32(65535); + return v_uint16x32(_v512_combine(_mm512_cvtepi32_epi16(_mm512_min_epu32(a.val, m)), _mm512_cvtepi32_epi16(_mm512_min_epu32(b.val, m)))); +} + +inline v_uint16x32 v_pack_u(const v_int32x16& a, const v_int32x16& b) +{ return v_uint16x32(_mm512_permutexvar_epi64(_v512_set_epu64(7, 5, 3, 1, 6, 4, 2, 0), _mm512_packus_epi32(a.val, b.val))); } + +inline void v_pack_store(short* ptr, const v_int32x16& a) +{ v_store_low(ptr, v_pack(a, a)); } + +inline void v_pack_store(ushort* ptr, const v_uint32x16& a) +{ + const __m512i m = _mm512_set1_epi32(65535); + _mm256_storeu_si256((__m256i*)ptr, _mm512_cvtepi32_epi16(_mm512_min_epu32(a.val, m))); +} + +inline void v_pack_u_store(ushort* ptr, const v_int32x16& a) +{ v_store_low(ptr, v_pack_u(a, a)); } + + +template inline +v_uint16x32 v_rshr_pack(const v_uint32x16& a, const v_uint32x16& b) +{ + v_uint32x16 delta = v512_setall_u32(1 << (n-1)); + return v_pack_u(v_reinterpret_as_s32((a + delta) >> n), + v_reinterpret_as_s32((b + delta) >> n)); +} + +template inline +void v_rshr_pack_store(ushort* ptr, const v_uint32x16& a) +{ + v_uint32x16 delta = v512_setall_u32(1 << (n-1)); + v_pack_u_store(ptr, v_reinterpret_as_s32((a + delta) >> n)); +} + +template inline +v_uint16x32 v_rshr_pack_u(const v_int32x16& a, const v_int32x16& b) +{ + v_int32x16 delta = v512_setall_s32(1 << (n-1)); + return v_pack_u((a + delta) >> n, (b + delta) >> n); +} + +template inline +void v_rshr_pack_u_store(ushort* ptr, const v_int32x16& a) +{ + v_int32x16 delta = v512_setall_s32(1 << (n-1)); + v_pack_u_store(ptr, (a + delta) >> n); +} + +template inline +v_int16x32 v_rshr_pack(const v_int32x16& a, const v_int32x16& b) +{ + v_int32x16 delta = v512_setall_s32(1 << (n-1)); + return v_pack((a + delta) >> n, (b + delta) >> n); +} + +template inline +void v_rshr_pack_store(short* ptr, const v_int32x16& a) +{ + v_int32x16 delta = v512_setall_s32(1 << (n-1)); + v_pack_store(ptr, (a + delta) >> n); +} + +// 64 +// Non-saturating pack +inline v_uint32x16 v_pack(const v_uint64x8& a, const v_uint64x8& b) +{ return v_uint32x16(_v512_combine(_mm512_cvtepi64_epi32(a.val), _mm512_cvtepi64_epi32(b.val))); } + +inline v_int32x16 v_pack(const v_int64x8& a, const v_int64x8& b) +{ return v_reinterpret_as_s32(v_pack(v_reinterpret_as_u64(a), v_reinterpret_as_u64(b))); } + +inline void v_pack_store(unsigned* ptr, const v_uint64x8& a) +{ _mm256_storeu_si256((__m256i*)ptr, _mm512_cvtepi64_epi32(a.val)); } + +inline void v_pack_store(int* ptr, const v_int64x8& b) +{ v_pack_store((unsigned*)ptr, v_reinterpret_as_u64(b)); } + +template inline +v_uint32x16 v_rshr_pack(const v_uint64x8& a, const v_uint64x8& b) +{ + v_uint64x8 delta = v512_setall_u64((uint64)1 << (n-1)); + return v_pack((a + delta) >> n, (b + delta) >> n); +} + +template inline +void v_rshr_pack_store(unsigned* ptr, const v_uint64x8& a) +{ + v_uint64x8 delta = v512_setall_u64((uint64)1 << (n-1)); + v_pack_store(ptr, (a + delta) >> n); +} + +template inline +v_int32x16 v_rshr_pack(const v_int64x8& a, const v_int64x8& b) +{ + v_int64x8 delta = v512_setall_s64((int64)1 << (n-1)); + return v_pack((a + delta) >> n, (b + delta) >> n); +} + +template inline +void v_rshr_pack_store(int* ptr, const v_int64x8& a) +{ + v_int64x8 delta = v512_setall_s64((int64)1 << (n-1)); + v_pack_store(ptr, (a + delta) >> n); +} + +// pack boolean +inline v_uint8x64 v_pack_b(const v_uint16x32& a, const v_uint16x32& b) +{ return v_uint8x64(_mm512_permutexvar_epi64(_v512_set_epu64(7, 5, 3, 1, 6, 4, 2, 0), _mm512_packs_epi16(a.val, b.val))); } + +inline v_uint8x64 v_pack_b(const v_uint32x16& a, const v_uint32x16& b, + const v_uint32x16& c, const v_uint32x16& d) +{ + __m512i ab = _mm512_packs_epi32(a.val, b.val); + __m512i cd = _mm512_packs_epi32(c.val, d.val); + + return v_uint8x64(_mm512_permutexvar_epi32(_v512_set_epu32(15, 11, 7, 3, 14, 10, 6, 2, 13, 9, 5, 1, 12, 8, 4, 0), _mm512_packs_epi16(ab, cd))); +} + +inline v_uint8x64 v_pack_b(const v_uint64x8& a, const v_uint64x8& b, const v_uint64x8& c, + const v_uint64x8& d, const v_uint64x8& e, const v_uint64x8& f, + const v_uint64x8& g, const v_uint64x8& h) +{ + __m512i ab = _mm512_packs_epi32(a.val, b.val); + __m512i cd = _mm512_packs_epi32(c.val, d.val); + __m512i ef = _mm512_packs_epi32(e.val, f.val); + __m512i gh = _mm512_packs_epi32(g.val, h.val); + + __m512i abcd = _mm512_packs_epi32(ab, cd); + __m512i efgh = _mm512_packs_epi32(ef, gh); + + return v_uint8x64(_mm512_permutexvar_epi16(_v512_set_epu16(31, 23, 15, 7, 30, 22, 14, 6, 29, 21, 13, 5, 28, 20, 12, 4, + 27, 19, 11, 3, 26, 18, 10, 2, 25, 17, 9, 1, 24, 16, 8, 0), _mm512_packs_epi16(abcd, efgh))); +} + +/* Recombine */ +// its up there with load and store operations + +/* Extract */ +#define OPENCV_HAL_IMPL_AVX512_EXTRACT(_Tpvec) \ + template \ + inline _Tpvec v_extract(const _Tpvec& a, const _Tpvec& b) \ + { return v_rotate_right(a, b); } + +OPENCV_HAL_IMPL_AVX512_EXTRACT(v_uint8x64) +OPENCV_HAL_IMPL_AVX512_EXTRACT(v_int8x64) +OPENCV_HAL_IMPL_AVX512_EXTRACT(v_uint16x32) +OPENCV_HAL_IMPL_AVX512_EXTRACT(v_int16x32) +OPENCV_HAL_IMPL_AVX512_EXTRACT(v_uint32x16) +OPENCV_HAL_IMPL_AVX512_EXTRACT(v_int32x16) +OPENCV_HAL_IMPL_AVX512_EXTRACT(v_uint64x8) +OPENCV_HAL_IMPL_AVX512_EXTRACT(v_int64x8) +OPENCV_HAL_IMPL_AVX512_EXTRACT(v_float32x16) +OPENCV_HAL_IMPL_AVX512_EXTRACT(v_float64x8) + +#define OPENCV_HAL_IMPL_AVX512_EXTRACT_N(_Tpvec, _Tp) \ +template inline _Tp v_extract_n(_Tpvec v) { return v_rotate_right(v).get0(); } + +OPENCV_HAL_IMPL_AVX512_EXTRACT_N(v_uint8x64, uchar) +OPENCV_HAL_IMPL_AVX512_EXTRACT_N(v_int8x64, schar) +OPENCV_HAL_IMPL_AVX512_EXTRACT_N(v_uint16x32, ushort) +OPENCV_HAL_IMPL_AVX512_EXTRACT_N(v_int16x32, short) +OPENCV_HAL_IMPL_AVX512_EXTRACT_N(v_uint32x16, uint) +OPENCV_HAL_IMPL_AVX512_EXTRACT_N(v_int32x16, int) +OPENCV_HAL_IMPL_AVX512_EXTRACT_N(v_uint64x8, uint64) +OPENCV_HAL_IMPL_AVX512_EXTRACT_N(v_int64x8, int64) +OPENCV_HAL_IMPL_AVX512_EXTRACT_N(v_float32x16, float) +OPENCV_HAL_IMPL_AVX512_EXTRACT_N(v_float64x8, double) + +template +inline v_uint32x16 v_broadcast_element(v_uint32x16 a) +{ + static const __m512i perm = _mm512_set1_epi32((char)i); + return v_uint32x16(_mm512_permutexvar_epi32(perm, a.val)); +} + +template +inline v_int32x16 v_broadcast_element(const v_int32x16 &a) +{ return v_reinterpret_as_s32(v_broadcast_element(v_reinterpret_as_u32(a))); } + +template +inline v_float32x16 v_broadcast_element(const v_float32x16 &a) +{ return v_reinterpret_as_f32(v_broadcast_element(v_reinterpret_as_u32(a))); } + + +///////////////////// load deinterleave ///////////////////////////// + +inline void v_load_deinterleave( const uchar* ptr, v_uint8x64& a, v_uint8x64& b ) +{ + __m512i ab0 = _mm512_loadu_si512((const __m512i*)ptr); + __m512i ab1 = _mm512_loadu_si512((const __m512i*)(ptr + 64)); +#if CV_AVX_512VBMI + __m512i mask0 = _v512_set_epu8(126, 124, 122, 120, 118, 116, 114, 112, 110, 108, 106, 104, 102, 100, 98, 96, + 94, 92, 90, 88, 86, 84, 82, 80, 78, 76, 74, 72, 70, 68, 66, 64, + 62, 60, 58, 56, 54, 52, 50, 48, 46, 44, 42, 40, 38, 36, 34, 32, + 30, 28, 26, 24, 22, 20, 18, 16, 14, 12, 10, 8, 6, 4, 2, 0); + __m512i mask1 = _v512_set_epu8(127, 125, 123, 121, 119, 117, 115, 113, 111, 109, 107, 105, 103, 101, 99, 97, + 95, 93, 91, 89, 87, 85, 83, 81, 79, 77, 75, 73, 71, 69, 67, 65, + 63, 61, 59, 57, 55, 53, 51, 49, 47, 45, 43, 41, 39, 37, 35, 33, + 31, 29, 27, 25, 23, 21, 19, 17, 15, 13, 11, 9, 7, 5, 3, 1); + a = v_uint8x64(_mm512_permutex2var_epi8(ab0, mask0, ab1)); + b = v_uint8x64(_mm512_permutex2var_epi8(ab0, mask1, ab1)); +#else + __m512i mask0 = _mm512_set4_epi32(0x0f0d0b09, 0x07050301, 0x0e0c0a08, 0x06040200); + __m512i a0b0 = _mm512_shuffle_epi8(ab0, mask0); + __m512i a1b1 = _mm512_shuffle_epi8(ab1, mask0); + __m512i mask1 = _v512_set_epu64(14, 12, 10, 8, 6, 4, 2, 0); + __m512i mask2 = _v512_set_epu64(15, 13, 11, 9, 7, 5, 3, 1); + a = v_uint8x64(_mm512_permutex2var_epi64(a0b0, mask1, a1b1)); + b = v_uint8x64(_mm512_permutex2var_epi64(a0b0, mask2, a1b1)); +#endif +} + +inline void v_load_deinterleave( const ushort* ptr, v_uint16x32& a, v_uint16x32& b ) +{ + __m512i ab0 = _mm512_loadu_si512((const __m512i*)ptr); + __m512i ab1 = _mm512_loadu_si512((const __m512i*)(ptr + 32)); + __m512i mask0 = _v512_set_epu16(62, 60, 58, 56, 54, 52, 50, 48, 46, 44, 42, 40, 38, 36, 34, 32, + 30, 28, 26, 24, 22, 20, 18, 16, 14, 12, 10, 8, 6, 4, 2, 0); + __m512i mask1 = _v512_set_epu16(63, 61, 59, 57, 55, 53, 51, 49, 47, 45, 43, 41, 39, 37, 35, 33, + 31, 29, 27, 25, 23, 21, 19, 17, 15, 13, 11, 9, 7, 5, 3, 1); + a = v_uint16x32(_mm512_permutex2var_epi16(ab0, mask0, ab1)); + b = v_uint16x32(_mm512_permutex2var_epi16(ab0, mask1, ab1)); +} + +inline void v_load_deinterleave( const unsigned* ptr, v_uint32x16& a, v_uint32x16& b ) +{ + __m512i ab0 = _mm512_loadu_si512((const __m512i*)ptr); + __m512i ab1 = _mm512_loadu_si512((const __m512i*)(ptr + 16)); + __m512i mask0 = _v512_set_epu32(30, 28, 26, 24, 22, 20, 18, 16, 14, 12, 10, 8, 6, 4, 2, 0); + __m512i mask1 = _v512_set_epu32(31, 29, 27, 25, 23, 21, 19, 17, 15, 13, 11, 9, 7, 5, 3, 1); + a = v_uint32x16(_mm512_permutex2var_epi32(ab0, mask0, ab1)); + b = v_uint32x16(_mm512_permutex2var_epi32(ab0, mask1, ab1)); +} + +inline void v_load_deinterleave( const uint64* ptr, v_uint64x8& a, v_uint64x8& b ) +{ + __m512i ab0 = _mm512_loadu_si512((const __m512i*)ptr); + __m512i ab1 = _mm512_loadu_si512((const __m512i*)(ptr + 8)); + __m512i mask0 = _v512_set_epu64(14, 12, 10, 8, 6, 4, 2, 0); + __m512i mask1 = _v512_set_epu64(15, 13, 11, 9, 7, 5, 3, 1); + a = v_uint64x8(_mm512_permutex2var_epi64(ab0, mask0, ab1)); + b = v_uint64x8(_mm512_permutex2var_epi64(ab0, mask1, ab1)); +} + +inline void v_load_deinterleave( const uchar* ptr, v_uint8x64& a, v_uint8x64& b, v_uint8x64& c ) +{ + __m512i bgr0 = _mm512_loadu_si512((const __m512i*)ptr); + __m512i bgr1 = _mm512_loadu_si512((const __m512i*)(ptr + 64)); + __m512i bgr2 = _mm512_loadu_si512((const __m512i*)(ptr + 128)); + +#if CV_AVX_512VBMI2 + __m512i mask0 = _v512_set_epu8(126, 123, 120, 117, 114, 111, 108, 105, 102, 99, 96, 93, 90, 87, 84, 81, + 78, 75, 72, 69, 66, 63, 60, 57, 54, 51, 48, 45, 42, 39, 36, 33, + 30, 27, 24, 21, 18, 15, 12, 9, 6, 3, 0, 62, 59, 56, 53, 50, + 47, 44, 41, 38, 35, 32, 29, 26, 23, 20, 17, 14, 11, 8, 5, 2); + __m512i r0b01 = _mm512_permutex2var_epi8(bgr0, mask0, bgr1); + __m512i b1g12 = _mm512_permutex2var_epi8(bgr1, mask0, bgr2); + __m512i r12b2 = _mm512_permutex2var_epi8(bgr1, + _v512_set_epu8(125, 122, 119, 116, 113, 110, 107, 104, 101, 98, 95, 92, 89, 86, 83, 80, + 77, 74, 71, 68, 65, 127, 124, 121, 118, 115, 112, 109, 106, 103, 100, 97, + 94, 91, 88, 85, 82, 79, 76, 73, 70, 67, 64, 61, 58, 55, 52, 49, + 46, 43, 40, 37, 34, 31, 28, 25, 22, 19, 16, 13, 10, 7, 4, 1), bgr2); + a = v_uint8x64(_mm512_mask_compress_epi8(r12b2, 0xffffffffffe00000, r0b01)); + b = v_uint8x64(_mm512_mask_compress_epi8(b1g12, 0x2492492492492492, bgr0)); + c = v_uint8x64(_mm512_mask_expand_epi8(r0b01, 0xffffffffffe00000, r12b2)); +#elif CV_AVX_512VBMI + __m512i b0g0b1 = _mm512_mask_blend_epi8(0xb6db6db6db6db6db, bgr1, bgr0); + __m512i g1r1g2 = _mm512_mask_blend_epi8(0xb6db6db6db6db6db, bgr2, bgr1); + __m512i r2b2r0 = _mm512_mask_blend_epi8(0xb6db6db6db6db6db, bgr0, bgr2); + a = v_uint8x64(_mm512_permutex2var_epi8(b0g0b1, _v512_set_epu8(125, 122, 119, 116, 113, 110, 107, 104, 101, 98, 95, 92, 89, 86, 83, 80, + 77, 74, 71, 68, 65, 63, 61, 60, 58, 57, 55, 54, 52, 51, 49, 48, + 46, 45, 43, 42, 40, 39, 37, 36, 34, 33, 31, 30, 28, 27, 25, 24, + 23, 21, 20, 18, 17, 15, 14, 12, 11, 9, 8, 6, 5, 3, 2, 0), bgr2)); + b = v_uint8x64(_mm512_permutex2var_epi8(g1r1g2, _v512_set_epu8( 63, 61, 60, 58, 57, 55, 54, 52, 51, 49, 48, 46, 45, 43, 42, 40, + 39, 37, 36, 34, 33, 31, 30, 28, 27, 25, 24, 23, 21, 20, 18, 17, + 15, 14, 12, 11, 9, 8, 6, 5, 3, 2, 0, 126, 123, 120, 117, 114, + 111, 108, 105, 102, 99, 96, 93, 90, 87, 84, 81, 78, 75, 72, 69, 66), bgr0)); + c = v_uint8x64(_mm512_permutex2var_epi8(r2b2r0, _v512_set_epu8( 63, 60, 57, 54, 51, 48, 45, 42, 39, 36, 33, 30, 27, 24, 21, 18, + 15, 12, 9, 6, 3, 0, 125, 122, 119, 116, 113, 110, 107, 104, 101, 98, + 95, 92, 89, 86, 83, 80, 77, 74, 71, 68, 65, 62, 59, 56, 53, 50, + 47, 44, 41, 38, 35, 32, 29, 26, 23, 20, 17, 14, 11, 8, 5, 2), bgr1)); +#else + __m512i mask0 = _v512_set_epu16(61, 58, 55, 52, 49, 46, 43, 40, 37, 34, 63, 60, 57, 54, 51, 48, + 45, 42, 39, 36, 33, 30, 27, 24, 21, 18, 15, 12, 9, 6, 3, 0); + __m512i b01g1 = _mm512_permutex2var_epi16(bgr0, mask0, bgr1); + __m512i r12b2 = _mm512_permutex2var_epi16(bgr1, mask0, bgr2); + __m512i g20r0 = _mm512_permutex2var_epi16(bgr2, mask0, bgr0); + + __m512i b0g0 = _mm512_mask_blend_epi32(0xf800, b01g1, r12b2); + __m512i r0b1 = _mm512_permutex2var_epi16(bgr1, _v512_set_epu16(42, 41, 40, 39, 38, 37, 36, 35, 34, 33, 32, 29, 26, 23, 20, 17, + 14, 11, 8, 5, 2, 53, 52, 51, 50, 49, 48, 47, 46, 45, 44, 43), g20r0); + __m512i g1r1 = _mm512_alignr_epi32(r12b2, g20r0, 11); + a = v_uint8x64(_mm512_mask_blend_epi8(0xAAAAAAAAAAAAAAAA, b0g0, r0b1)); + c = v_uint8x64(_mm512_mask_blend_epi8(0xAAAAAAAAAAAAAAAA, r0b1, g1r1)); + b = v_uint8x64(_mm512_shuffle_epi8(_mm512_mask_blend_epi8(0xAAAAAAAAAAAAAAAA, g1r1, b0g0), _mm512_set4_epi32(0x0e0f0c0d, 0x0a0b0809, 0x06070405, 0x02030001))); +#endif +} + +inline void v_load_deinterleave( const ushort* ptr, v_uint16x32& a, v_uint16x32& b, v_uint16x32& c ) +{ + __m512i bgr0 = _mm512_loadu_si512((const __m512i*)ptr); + __m512i bgr1 = _mm512_loadu_si512((const __m512i*)(ptr + 32)); + __m512i bgr2 = _mm512_loadu_si512((const __m512i*)(ptr + 64)); + + __m512i mask0 = _v512_set_epu16(61, 58, 55, 52, 49, 46, 43, 40, 37, 34, 63, 60, 57, 54, 51, 48, + 45, 42, 39, 36, 33, 30, 27, 24, 21, 18, 15, 12, 9, 6, 3, 0); + __m512i b01g1 = _mm512_permutex2var_epi16(bgr0, mask0, bgr1); + __m512i r12b2 = _mm512_permutex2var_epi16(bgr1, mask0, bgr2); + __m512i g20r0 = _mm512_permutex2var_epi16(bgr2, mask0, bgr0); + + a = v_uint16x32(_mm512_mask_blend_epi32(0xf800, b01g1, r12b2)); + b = v_uint16x32(_mm512_permutex2var_epi16(bgr1, _v512_set_epu16(42, 41, 40, 39, 38, 37, 36, 35, 34, 33, 32, 29, 26, 23, 20, 17, + 14, 11, 8, 5, 2, 53, 52, 51, 50, 49, 48, 47, 46, 45, 44, 43), g20r0)); + c = v_uint16x32(_mm512_alignr_epi32(r12b2, g20r0, 11)); +} + +inline void v_load_deinterleave( const unsigned* ptr, v_uint32x16& a, v_uint32x16& b, v_uint32x16& c ) +{ + __m512i bgr0 = _mm512_loadu_si512((const __m512i*)ptr); + __m512i bgr1 = _mm512_loadu_si512((const __m512i*)(ptr + 16)); + __m512i bgr2 = _mm512_loadu_si512((const __m512i*)(ptr + 32)); + + __m512i mask0 = _v512_set_epu32(29, 26, 23, 20, 17, 30, 27, 24, 21, 18, 15, 12, 9, 6, 3, 0); + __m512i b01r1 = _mm512_permutex2var_epi32(bgr0, mask0, bgr1); + __m512i g12b2 = _mm512_permutex2var_epi32(bgr1, mask0, bgr2); + __m512i r20g0 = _mm512_permutex2var_epi32(bgr2, mask0, bgr0); + + a = v_uint32x16(_mm512_mask_blend_epi32(0xf800, b01r1, g12b2)); + b = v_uint32x16(_mm512_alignr_epi32(g12b2, r20g0, 11)); + c = v_uint32x16(_mm512_permutex2var_epi32(bgr1, _v512_set_epu32(21, 20, 19, 18, 17, 16, 13, 10, 7, 4, 1, 26, 25, 24, 23, 22), r20g0)); +} + +inline void v_load_deinterleave( const uint64* ptr, v_uint64x8& a, v_uint64x8& b, v_uint64x8& c ) +{ + __m512i bgr0 = _mm512_loadu_si512((const __m512i*)ptr); + __m512i bgr1 = _mm512_loadu_si512((const __m512i*)(ptr + 8)); + __m512i bgr2 = _mm512_loadu_si512((const __m512i*)(ptr + 16)); + + __m512i mask0 = _v512_set_epu64(13, 10, 15, 12, 9, 6, 3, 0); + __m512i b01g1 = _mm512_permutex2var_epi64(bgr0, mask0, bgr1); + __m512i r12b2 = _mm512_permutex2var_epi64(bgr1, mask0, bgr2); + __m512i g20r0 = _mm512_permutex2var_epi64(bgr2, mask0, bgr0); + + a = v_uint64x8(_mm512_mask_blend_epi64(0xc0, b01g1, r12b2)); + c = v_uint64x8(_mm512_alignr_epi64(r12b2, g20r0, 6)); + b = v_uint64x8(_mm512_permutex2var_epi64(bgr1, _v512_set_epu64(10, 9, 8, 5, 2, 13, 12, 11), g20r0)); +} + +inline void v_load_deinterleave( const uchar* ptr, v_uint8x64& a, v_uint8x64& b, v_uint8x64& c, v_uint8x64& d ) +{ + __m512i bgra0 = _mm512_loadu_si512((const __m512i*)ptr); + __m512i bgra1 = _mm512_loadu_si512((const __m512i*)(ptr + 64)); + __m512i bgra2 = _mm512_loadu_si512((const __m512i*)(ptr + 128)); + __m512i bgra3 = _mm512_loadu_si512((const __m512i*)(ptr + 192)); + +#if CV_AVX_512VBMI + __m512i mask0 = _v512_set_epu8(126, 124, 122, 120, 118, 116, 114, 112, 110, 108, 106, 104, 102, 100, 98, 96, + 94, 92, 90, 88, 86, 84, 82, 80, 78, 76, 74, 72, 70, 68, 66, 64, + 62, 60, 58, 56, 54, 52, 50, 48, 46, 44, 42, 40, 38, 36, 34, 32, + 30, 28, 26, 24, 22, 20, 18, 16, 14, 12, 10, 8, 6, 4, 2, 0); + __m512i mask1 = _v512_set_epu8(127, 125, 123, 121, 119, 117, 115, 113, 111, 109, 107, 105, 103, 101, 99, 97, + 95, 93, 91, 89, 87, 85, 83, 81, 79, 77, 75, 73, 71, 69, 67, 65, + 63, 61, 59, 57, 55, 53, 51, 49, 47, 45, 43, 41, 39, 37, 35, 33, + 31, 29, 27, 25, 23, 21, 19, 17, 15, 13, 11, 9, 7, 5, 3, 1); + + __m512i br01 = _mm512_permutex2var_epi8(bgra0, mask0, bgra1); + __m512i ga01 = _mm512_permutex2var_epi8(bgra0, mask1, bgra1); + __m512i br23 = _mm512_permutex2var_epi8(bgra2, mask0, bgra3); + __m512i ga23 = _mm512_permutex2var_epi8(bgra2, mask1, bgra3); + + a = v_uint8x64(_mm512_permutex2var_epi8(br01, mask0, br23)); + c = v_uint8x64(_mm512_permutex2var_epi8(br01, mask1, br23)); + b = v_uint8x64(_mm512_permutex2var_epi8(ga01, mask0, ga23)); + d = v_uint8x64(_mm512_permutex2var_epi8(ga01, mask1, ga23)); +#else + __m512i mask = _mm512_set4_epi32(0x0f0b0703, 0x0e0a0602, 0x0d090501, 0x0c080400); + __m512i b0g0r0a0 = _mm512_shuffle_epi8(bgra0, mask); + __m512i b1g1r1a1 = _mm512_shuffle_epi8(bgra1, mask); + __m512i b2g2r2a2 = _mm512_shuffle_epi8(bgra2, mask); + __m512i b3g3r3a3 = _mm512_shuffle_epi8(bgra3, mask); + + __m512i mask0 = _v512_set_epu32(30, 28, 26, 24, 22, 20, 18, 16, 14, 12, 10, 8, 6, 4, 2, 0); + __m512i mask1 = _v512_set_epu32(31, 29, 27, 25, 23, 21, 19, 17, 15, 13, 11, 9, 7, 5, 3, 1); + + __m512i br01 = _mm512_permutex2var_epi32(b0g0r0a0, mask0, b1g1r1a1); + __m512i ga01 = _mm512_permutex2var_epi32(b0g0r0a0, mask1, b1g1r1a1); + __m512i br23 = _mm512_permutex2var_epi32(b2g2r2a2, mask0, b3g3r3a3); + __m512i ga23 = _mm512_permutex2var_epi32(b2g2r2a2, mask1, b3g3r3a3); + + a = v_uint8x64(_mm512_permutex2var_epi32(br01, mask0, br23)); + c = v_uint8x64(_mm512_permutex2var_epi32(br01, mask1, br23)); + b = v_uint8x64(_mm512_permutex2var_epi32(ga01, mask0, ga23)); + d = v_uint8x64(_mm512_permutex2var_epi32(ga01, mask1, ga23)); +#endif +} + +inline void v_load_deinterleave( const ushort* ptr, v_uint16x32& a, v_uint16x32& b, v_uint16x32& c, v_uint16x32& d ) +{ + __m512i bgra0 = _mm512_loadu_si512((const __m512i*)ptr); + __m512i bgra1 = _mm512_loadu_si512((const __m512i*)(ptr + 32)); + __m512i bgra2 = _mm512_loadu_si512((const __m512i*)(ptr + 64)); + __m512i bgra3 = _mm512_loadu_si512((const __m512i*)(ptr + 96)); + + __m512i mask0 = _v512_set_epu16(62, 60, 58, 56, 54, 52, 50, 48, 46, 44, 42, 40, 38, 36, 34, 32, + 30, 28, 26, 24, 22, 20, 18, 16, 14, 12, 10, 8, 6, 4, 2, 0); + __m512i mask1 = _v512_set_epu16(63, 61, 59, 57, 55, 53, 51, 49, 47, 45, 43, 41, 39, 37, 35, 33, + 31, 29, 27, 25, 23, 21, 19, 17, 15, 13, 11, 9, 7, 5, 3, 1); + + __m512i br01 = _mm512_permutex2var_epi16(bgra0, mask0, bgra1); + __m512i ga01 = _mm512_permutex2var_epi16(bgra0, mask1, bgra1); + __m512i br23 = _mm512_permutex2var_epi16(bgra2, mask0, bgra3); + __m512i ga23 = _mm512_permutex2var_epi16(bgra2, mask1, bgra3); + + a = v_uint16x32(_mm512_permutex2var_epi16(br01, mask0, br23)); + c = v_uint16x32(_mm512_permutex2var_epi16(br01, mask1, br23)); + b = v_uint16x32(_mm512_permutex2var_epi16(ga01, mask0, ga23)); + d = v_uint16x32(_mm512_permutex2var_epi16(ga01, mask1, ga23)); +} + +inline void v_load_deinterleave( const unsigned* ptr, v_uint32x16& a, v_uint32x16& b, v_uint32x16& c, v_uint32x16& d ) +{ + __m512i bgra0 = _mm512_loadu_si512((const __m512i*)ptr); + __m512i bgra1 = _mm512_loadu_si512((const __m512i*)(ptr + 16)); + __m512i bgra2 = _mm512_loadu_si512((const __m512i*)(ptr + 32)); + __m512i bgra3 = _mm512_loadu_si512((const __m512i*)(ptr + 48)); + + __m512i mask0 = _v512_set_epu32(30, 28, 26, 24, 22, 20, 18, 16, 14, 12, 10, 8, 6, 4, 2, 0); + __m512i mask1 = _v512_set_epu32(31, 29, 27, 25, 23, 21, 19, 17, 15, 13, 11, 9, 7, 5, 3, 1); + + __m512i br01 = _mm512_permutex2var_epi32(bgra0, mask0, bgra1); + __m512i ga01 = _mm512_permutex2var_epi32(bgra0, mask1, bgra1); + __m512i br23 = _mm512_permutex2var_epi32(bgra2, mask0, bgra3); + __m512i ga23 = _mm512_permutex2var_epi32(bgra2, mask1, bgra3); + + a = v_uint32x16(_mm512_permutex2var_epi32(br01, mask0, br23)); + c = v_uint32x16(_mm512_permutex2var_epi32(br01, mask1, br23)); + b = v_uint32x16(_mm512_permutex2var_epi32(ga01, mask0, ga23)); + d = v_uint32x16(_mm512_permutex2var_epi32(ga01, mask1, ga23)); +} + +inline void v_load_deinterleave( const uint64* ptr, v_uint64x8& a, v_uint64x8& b, v_uint64x8& c, v_uint64x8& d ) +{ + __m512i bgra0 = _mm512_loadu_si512((const __m512i*)ptr); + __m512i bgra1 = _mm512_loadu_si512((const __m512i*)(ptr + 8)); + __m512i bgra2 = _mm512_loadu_si512((const __m512i*)(ptr + 16)); + __m512i bgra3 = _mm512_loadu_si512((const __m512i*)(ptr + 24)); + + __m512i mask0 = _v512_set_epu64(14, 12, 10, 8, 6, 4, 2, 0); + __m512i mask1 = _v512_set_epu64(15, 13, 11, 9, 7, 5, 3, 1); + + __m512i br01 = _mm512_permutex2var_epi64(bgra0, mask0, bgra1); + __m512i ga01 = _mm512_permutex2var_epi64(bgra0, mask1, bgra1); + __m512i br23 = _mm512_permutex2var_epi64(bgra2, mask0, bgra3); + __m512i ga23 = _mm512_permutex2var_epi64(bgra2, mask1, bgra3); + + a = v_uint64x8(_mm512_permutex2var_epi64(br01, mask0, br23)); + c = v_uint64x8(_mm512_permutex2var_epi64(br01, mask1, br23)); + b = v_uint64x8(_mm512_permutex2var_epi64(ga01, mask0, ga23)); + d = v_uint64x8(_mm512_permutex2var_epi64(ga01, mask1, ga23)); +} + +///////////////////////////// store interleave ///////////////////////////////////// + +inline void v_store_interleave( uchar* ptr, const v_uint8x64& x, const v_uint8x64& y, + hal::StoreMode mode=hal::STORE_UNALIGNED ) +{ + v_uint8x64 low, high; + v_zip(x, y, low, high); + if( mode == hal::STORE_ALIGNED_NOCACHE ) + { + _mm512_stream_si512((__m512i*)ptr, low.val); + _mm512_stream_si512((__m512i*)(ptr + 64), high.val); + } + else if( mode == hal::STORE_ALIGNED ) + { + _mm512_store_si512((__m512i*)ptr, low.val); + _mm512_store_si512((__m512i*)(ptr + 64), high.val); + } + else + { + _mm512_storeu_si512((__m512i*)ptr, low.val); + _mm512_storeu_si512((__m512i*)(ptr + 64), high.val); + } +} + +inline void v_store_interleave( ushort* ptr, const v_uint16x32& x, const v_uint16x32& y, + hal::StoreMode mode=hal::STORE_UNALIGNED ) +{ + v_uint16x32 low, high; + v_zip(x, y, low, high); + if( mode == hal::STORE_ALIGNED_NOCACHE ) + { + _mm512_stream_si512((__m512i*)ptr, low.val); + _mm512_stream_si512((__m512i*)(ptr + 32), high.val); + } + else if( mode == hal::STORE_ALIGNED ) + { + _mm512_store_si512((__m512i*)ptr, low.val); + _mm512_store_si512((__m512i*)(ptr + 32), high.val); + } + else + { + _mm512_storeu_si512((__m512i*)ptr, low.val); + _mm512_storeu_si512((__m512i*)(ptr + 32), high.val); + } +} + +inline void v_store_interleave( unsigned* ptr, const v_uint32x16& x, const v_uint32x16& y, + hal::StoreMode mode=hal::STORE_UNALIGNED ) +{ + v_uint32x16 low, high; + v_zip(x, y, low, high); + if( mode == hal::STORE_ALIGNED_NOCACHE ) + { + _mm512_stream_si512((__m512i*)ptr, low.val); + _mm512_stream_si512((__m512i*)(ptr + 16), high.val); + } + else if( mode == hal::STORE_ALIGNED ) + { + _mm512_store_si512((__m512i*)ptr, low.val); + _mm512_store_si512((__m512i*)(ptr + 16), high.val); + } + else + { + _mm512_storeu_si512((__m512i*)ptr, low.val); + _mm512_storeu_si512((__m512i*)(ptr + 16), high.val); + } +} + +inline void v_store_interleave( uint64* ptr, const v_uint64x8& x, const v_uint64x8& y, + hal::StoreMode mode=hal::STORE_UNALIGNED ) +{ + v_uint64x8 low, high; + v_zip(x, y, low, high); + if( mode == hal::STORE_ALIGNED_NOCACHE ) + { + _mm512_stream_si512((__m512i*)ptr, low.val); + _mm512_stream_si512((__m512i*)(ptr + 8), high.val); + } + else if( mode == hal::STORE_ALIGNED ) + { + _mm512_store_si512((__m512i*)ptr, low.val); + _mm512_store_si512((__m512i*)(ptr + 8), high.val); + } + else + { + _mm512_storeu_si512((__m512i*)ptr, low.val); + _mm512_storeu_si512((__m512i*)(ptr + 8), high.val); + } +} + +inline void v_store_interleave( uchar* ptr, const v_uint8x64& a, const v_uint8x64& b, const v_uint8x64& c, + hal::StoreMode mode=hal::STORE_UNALIGNED ) +{ +#if CV_AVX_512VBMI + __m512i mask0 = _v512_set_epu8(127, 84, 20, 126, 83, 19, 125, 82, 18, 124, 81, 17, 123, 80, 16, 122, + 79, 15, 121, 78, 14, 120, 77, 13, 119, 76, 12, 118, 75, 11, 117, 74, + 10, 116, 73, 9, 115, 72, 8, 114, 71, 7, 113, 70, 6, 112, 69, 5, + 111, 68, 4, 110, 67, 3, 109, 66, 2, 108, 65, 1, 107, 64, 0, 106); + __m512i mask1 = _v512_set_epu8( 21, 42, 105, 20, 41, 104, 19, 40, 103, 18, 39, 102, 17, 38, 101, 16, + 37, 100, 15, 36, 99, 14, 35, 98, 13, 34, 97, 12, 33, 96, 11, 32, + 95, 10, 31, 94, 9, 30, 93, 8, 29, 92, 7, 28, 91, 6, 27, 90, + 5, 26, 89, 4, 25, 88, 3, 24, 87, 2, 23, 86, 1, 22, 85, 0); + __m512i mask2 = _v512_set_epu8(106, 127, 63, 105, 126, 62, 104, 125, 61, 103, 124, 60, 102, 123, 59, 101, + 122, 58, 100, 121, 57, 99, 120, 56, 98, 119, 55, 97, 118, 54, 96, 117, + 53, 95, 116, 52, 94, 115, 51, 93, 114, 50, 92, 113, 49, 91, 112, 48, + 90, 111, 47, 89, 110, 46, 88, 109, 45, 87, 108, 44, 86, 107, 43, 85); + __m512i r2g0r0 = _mm512_permutex2var_epi8(b.val, mask0, c.val); + __m512i b0r1b1 = _mm512_permutex2var_epi8(a.val, mask1, c.val); + __m512i g1b2g2 = _mm512_permutex2var_epi8(a.val, mask2, b.val); + + __m512i bgr0 = _mm512_mask_blend_epi8(0x9249249249249249, r2g0r0, b0r1b1); + __m512i bgr1 = _mm512_mask_blend_epi8(0x9249249249249249, b0r1b1, g1b2g2); + __m512i bgr2 = _mm512_mask_blend_epi8(0x9249249249249249, g1b2g2, r2g0r0); +#else + __m512i g1g0 = _mm512_shuffle_epi8(b.val, _mm512_set4_epi32(0x0e0f0c0d, 0x0a0b0809, 0x06070405, 0x02030001)); + __m512i b0g0 = _mm512_mask_blend_epi8(0xAAAAAAAAAAAAAAAA, a.val, g1g0); + __m512i r0b1 = _mm512_mask_blend_epi8(0xAAAAAAAAAAAAAAAA, c.val, a.val); + __m512i g1r1 = _mm512_mask_blend_epi8(0xAAAAAAAAAAAAAAAA, g1g0, c.val); + + __m512i mask0 = _v512_set_epu16(42, 10, 31, 41, 9, 30, 40, 8, 29, 39, 7, 28, 38, 6, 27, 37, + 5, 26, 36, 4, 25, 35, 3, 24, 34, 2, 23, 33, 1, 22, 32, 0); + __m512i mask1 = _v512_set_epu16(21, 52, 41, 20, 51, 40, 19, 50, 39, 18, 49, 38, 17, 48, 37, 16, + 47, 36, 15, 46, 35, 14, 45, 34, 13, 44, 33, 12, 43, 32, 11, 42); + __m512i mask2 = _v512_set_epu16(63, 31, 20, 62, 30, 19, 61, 29, 18, 60, 28, 17, 59, 27, 16, 58, + 26, 15, 57, 25, 14, 56, 24, 13, 55, 23, 12, 54, 22, 11, 53, 21); + __m512i b0g0b2 = _mm512_permutex2var_epi16(b0g0, mask0, r0b1); + __m512i r1b1r0 = _mm512_permutex2var_epi16(b0g0, mask1, g1r1); + __m512i g2r2g1 = _mm512_permutex2var_epi16(r0b1, mask2, g1r1); + + __m512i bgr0 = _mm512_mask_blend_epi16(0x24924924, b0g0b2, r1b1r0); + __m512i bgr1 = _mm512_mask_blend_epi16(0x24924924, r1b1r0, g2r2g1); + __m512i bgr2 = _mm512_mask_blend_epi16(0x24924924, g2r2g1, b0g0b2); +#endif + + if( mode == hal::STORE_ALIGNED_NOCACHE ) + { + _mm512_stream_si512((__m512i*)ptr, bgr0); + _mm512_stream_si512((__m512i*)(ptr + 64), bgr1); + _mm512_stream_si512((__m512i*)(ptr + 128), bgr2); + } + else if( mode == hal::STORE_ALIGNED ) + { + _mm512_store_si512((__m512i*)ptr, bgr0); + _mm512_store_si512((__m512i*)(ptr + 64), bgr1); + _mm512_store_si512((__m512i*)(ptr + 128), bgr2); + } + else + { + _mm512_storeu_si512((__m512i*)ptr, bgr0); + _mm512_storeu_si512((__m512i*)(ptr + 64), bgr1); + _mm512_storeu_si512((__m512i*)(ptr + 128), bgr2); + } +} + +inline void v_store_interleave( ushort* ptr, const v_uint16x32& a, const v_uint16x32& b, const v_uint16x32& c, + hal::StoreMode mode=hal::STORE_UNALIGNED ) +{ + __m512i mask0 = _v512_set_epu16(42, 10, 31, 41, 9, 30, 40, 8, 29, 39, 7, 28, 38, 6, 27, 37, + 5, 26, 36, 4, 25, 35, 3, 24, 34, 2, 23, 33, 1, 22, 32, 0); + __m512i mask1 = _v512_set_epu16(21, 52, 41, 20, 51, 40, 19, 50, 39, 18, 49, 38, 17, 48, 37, 16, + 47, 36, 15, 46, 35, 14, 45, 34, 13, 44, 33, 12, 43, 32, 11, 42); + __m512i mask2 = _v512_set_epu16(63, 31, 20, 62, 30, 19, 61, 29, 18, 60, 28, 17, 59, 27, 16, 58, + 26, 15, 57, 25, 14, 56, 24, 13, 55, 23, 12, 54, 22, 11, 53, 21); + __m512i b0g0b2 = _mm512_permutex2var_epi16(a.val, mask0, b.val); + __m512i r1b1r0 = _mm512_permutex2var_epi16(a.val, mask1, c.val); + __m512i g2r2g1 = _mm512_permutex2var_epi16(b.val, mask2, c.val); + + __m512i bgr0 = _mm512_mask_blend_epi16(0x24924924, b0g0b2, r1b1r0); + __m512i bgr1 = _mm512_mask_blend_epi16(0x24924924, r1b1r0, g2r2g1); + __m512i bgr2 = _mm512_mask_blend_epi16(0x24924924, g2r2g1, b0g0b2); + + if( mode == hal::STORE_ALIGNED_NOCACHE ) + { + _mm512_stream_si512((__m512i*)ptr, bgr0); + _mm512_stream_si512((__m512i*)(ptr + 32), bgr1); + _mm512_stream_si512((__m512i*)(ptr + 64), bgr2); + } + else if( mode == hal::STORE_ALIGNED ) + { + _mm512_store_si512((__m512i*)ptr, bgr0); + _mm512_store_si512((__m512i*)(ptr + 32), bgr1); + _mm512_store_si512((__m512i*)(ptr + 64), bgr2); + } + else + { + _mm512_storeu_si512((__m512i*)ptr, bgr0); + _mm512_storeu_si512((__m512i*)(ptr + 32), bgr1); + _mm512_storeu_si512((__m512i*)(ptr + 64), bgr2); + } +} + +inline void v_store_interleave( unsigned* ptr, const v_uint32x16& a, const v_uint32x16& b, const v_uint32x16& c, + hal::StoreMode mode=hal::STORE_UNALIGNED ) +{ + __m512i mask0 = _v512_set_epu32(26, 31, 15, 25, 30, 14, 24, 29, 13, 23, 28, 12, 22, 27, 11, 21); + __m512i mask1 = _v512_set_epu32(31, 10, 25, 30, 9, 24, 29, 8, 23, 28, 7, 22, 27, 6, 21, 26); + __m512i g1b2g2 = _mm512_permutex2var_epi32(a.val, mask0, b.val); + __m512i r2r1b1 = _mm512_permutex2var_epi32(a.val, mask1, c.val); + + __m512i bgr0 = _mm512_mask_expand_epi32(_mm512_mask_expand_epi32(_mm512_maskz_expand_epi32(0x9249, a.val), 0x2492, b.val), 0x4924, c.val); + __m512i bgr1 = _mm512_mask_blend_epi32(0x9249, r2r1b1, g1b2g2); + __m512i bgr2 = _mm512_mask_blend_epi32(0x9249, g1b2g2, r2r1b1); + + if( mode == hal::STORE_ALIGNED_NOCACHE ) + { + _mm512_stream_si512((__m512i*)ptr, bgr0); + _mm512_stream_si512((__m512i*)(ptr + 16), bgr1); + _mm512_stream_si512((__m512i*)(ptr + 32), bgr2); + } + else if( mode == hal::STORE_ALIGNED ) + { + _mm512_store_si512((__m512i*)ptr, bgr0); + _mm512_store_si512((__m512i*)(ptr + 16), bgr1); + _mm512_store_si512((__m512i*)(ptr + 32), bgr2); + } + else + { + _mm512_storeu_si512((__m512i*)ptr, bgr0); + _mm512_storeu_si512((__m512i*)(ptr + 16), bgr1); + _mm512_storeu_si512((__m512i*)(ptr + 32), bgr2); + } +} + +inline void v_store_interleave( uint64* ptr, const v_uint64x8& a, const v_uint64x8& b, const v_uint64x8& c, + hal::StoreMode mode=hal::STORE_UNALIGNED ) +{ + __m512i mask0 = _v512_set_epu64( 5, 12, 7, 4, 11, 6, 3, 10); + __m512i mask1 = _v512_set_epu64(15, 7, 4, 14, 6, 3, 13, 5); + __m512i r1b1b2 = _mm512_permutex2var_epi64(a.val, mask0, c.val); + __m512i g2r2g1 = _mm512_permutex2var_epi64(b.val, mask1, c.val); + + __m512i bgr0 = _mm512_mask_expand_epi64(_mm512_mask_expand_epi64(_mm512_maskz_expand_epi64(0x49, a.val), 0x92, b.val), 0x24, c.val); + __m512i bgr1 = _mm512_mask_blend_epi64(0xdb, g2r2g1, r1b1b2); + __m512i bgr2 = _mm512_mask_blend_epi64(0xdb, r1b1b2, g2r2g1); + + if( mode == hal::STORE_ALIGNED_NOCACHE ) + { + _mm512_stream_si512((__m512i*)ptr, bgr0); + _mm512_stream_si512((__m512i*)(ptr + 8), bgr1); + _mm512_stream_si512((__m512i*)(ptr + 16), bgr2); + } + else if( mode == hal::STORE_ALIGNED ) + { + _mm512_store_si512((__m512i*)ptr, bgr0); + _mm512_store_si512((__m512i*)(ptr + 8), bgr1); + _mm512_store_si512((__m512i*)(ptr + 16), bgr2); + } + else + { + _mm512_storeu_si512((__m512i*)ptr, bgr0); + _mm512_storeu_si512((__m512i*)(ptr + 8), bgr1); + _mm512_storeu_si512((__m512i*)(ptr + 16), bgr2); + } +} + +inline void v_store_interleave( uchar* ptr, const v_uint8x64& a, const v_uint8x64& b, + const v_uint8x64& c, const v_uint8x64& d, + hal::StoreMode mode=hal::STORE_UNALIGNED ) +{ + v_uint8x64 br01, br23, ga01, ga23; + v_zip(a, c, br01, br23); + v_zip(b, d, ga01, ga23); + v_uint8x64 bgra0, bgra1, bgra2, bgra3; + v_zip(br01, ga01, bgra0, bgra1); + v_zip(br23, ga23, bgra2, bgra3); + + if( mode == hal::STORE_ALIGNED_NOCACHE ) + { + _mm512_stream_si512((__m512i*)ptr, bgra0.val); + _mm512_stream_si512((__m512i*)(ptr + 64), bgra1.val); + _mm512_stream_si512((__m512i*)(ptr + 128), bgra2.val); + _mm512_stream_si512((__m512i*)(ptr + 192), bgra3.val); + } + else if( mode == hal::STORE_ALIGNED ) + { + _mm512_store_si512((__m512i*)ptr, bgra0.val); + _mm512_store_si512((__m512i*)(ptr + 64), bgra1.val); + _mm512_store_si512((__m512i*)(ptr + 128), bgra2.val); + _mm512_store_si512((__m512i*)(ptr + 192), bgra3.val); + } + else + { + _mm512_storeu_si512((__m512i*)ptr, bgra0.val); + _mm512_storeu_si512((__m512i*)(ptr + 64), bgra1.val); + _mm512_storeu_si512((__m512i*)(ptr + 128), bgra2.val); + _mm512_storeu_si512((__m512i*)(ptr + 192), bgra3.val); + } +} + +inline void v_store_interleave( ushort* ptr, const v_uint16x32& a, const v_uint16x32& b, + const v_uint16x32& c, const v_uint16x32& d, + hal::StoreMode mode=hal::STORE_UNALIGNED ) +{ + v_uint16x32 br01, br23, ga01, ga23; + v_zip(a, c, br01, br23); + v_zip(b, d, ga01, ga23); + v_uint16x32 bgra0, bgra1, bgra2, bgra3; + v_zip(br01, ga01, bgra0, bgra1); + v_zip(br23, ga23, bgra2, bgra3); + + if( mode == hal::STORE_ALIGNED_NOCACHE ) + { + _mm512_stream_si512((__m512i*)ptr, bgra0.val); + _mm512_stream_si512((__m512i*)(ptr + 32), bgra1.val); + _mm512_stream_si512((__m512i*)(ptr + 64), bgra2.val); + _mm512_stream_si512((__m512i*)(ptr + 96), bgra3.val); + } + else if( mode == hal::STORE_ALIGNED ) + { + _mm512_store_si512((__m512i*)ptr, bgra0.val); + _mm512_store_si512((__m512i*)(ptr + 32), bgra1.val); + _mm512_store_si512((__m512i*)(ptr + 64), bgra2.val); + _mm512_store_si512((__m512i*)(ptr + 96), bgra3.val); + } + else + { + _mm512_storeu_si512((__m512i*)ptr, bgra0.val); + _mm512_storeu_si512((__m512i*)(ptr + 32), bgra1.val); + _mm512_storeu_si512((__m512i*)(ptr + 64), bgra2.val); + _mm512_storeu_si512((__m512i*)(ptr + 96), bgra3.val); + } +} + +inline void v_store_interleave( unsigned* ptr, const v_uint32x16& a, const v_uint32x16& b, + const v_uint32x16& c, const v_uint32x16& d, + hal::StoreMode mode=hal::STORE_UNALIGNED ) +{ + v_uint32x16 br01, br23, ga01, ga23; + v_zip(a, c, br01, br23); + v_zip(b, d, ga01, ga23); + v_uint32x16 bgra0, bgra1, bgra2, bgra3; + v_zip(br01, ga01, bgra0, bgra1); + v_zip(br23, ga23, bgra2, bgra3); + + if( mode == hal::STORE_ALIGNED_NOCACHE ) + { + _mm512_stream_si512((__m512i*)ptr, bgra0.val); + _mm512_stream_si512((__m512i*)(ptr + 16), bgra1.val); + _mm512_stream_si512((__m512i*)(ptr + 32), bgra2.val); + _mm512_stream_si512((__m512i*)(ptr + 48), bgra3.val); + } + else if( mode == hal::STORE_ALIGNED ) + { + _mm512_store_si512((__m512i*)ptr, bgra0.val); + _mm512_store_si512((__m512i*)(ptr + 16), bgra1.val); + _mm512_store_si512((__m512i*)(ptr + 32), bgra2.val); + _mm512_store_si512((__m512i*)(ptr + 48), bgra3.val); + } + else + { + _mm512_storeu_si512((__m512i*)ptr, bgra0.val); + _mm512_storeu_si512((__m512i*)(ptr + 16), bgra1.val); + _mm512_storeu_si512((__m512i*)(ptr + 32), bgra2.val); + _mm512_storeu_si512((__m512i*)(ptr + 48), bgra3.val); + } +} + +inline void v_store_interleave( uint64* ptr, const v_uint64x8& a, const v_uint64x8& b, + const v_uint64x8& c, const v_uint64x8& d, + hal::StoreMode mode=hal::STORE_UNALIGNED ) +{ + v_uint64x8 br01, br23, ga01, ga23; + v_zip(a, c, br01, br23); + v_zip(b, d, ga01, ga23); + v_uint64x8 bgra0, bgra1, bgra2, bgra3; + v_zip(br01, ga01, bgra0, bgra1); + v_zip(br23, ga23, bgra2, bgra3); + + if( mode == hal::STORE_ALIGNED_NOCACHE ) + { + _mm512_stream_si512((__m512i*)ptr, bgra0.val); + _mm512_stream_si512((__m512i*)(ptr + 8), bgra1.val); + _mm512_stream_si512((__m512i*)(ptr + 16), bgra2.val); + _mm512_stream_si512((__m512i*)(ptr + 24), bgra3.val); + } + else if( mode == hal::STORE_ALIGNED ) + { + _mm512_store_si512((__m512i*)ptr, bgra0.val); + _mm512_store_si512((__m512i*)(ptr + 8), bgra1.val); + _mm512_store_si512((__m512i*)(ptr + 16), bgra2.val); + _mm512_store_si512((__m512i*)(ptr + 24), bgra3.val); + } + else + { + _mm512_storeu_si512((__m512i*)ptr, bgra0.val); + _mm512_storeu_si512((__m512i*)(ptr + 8), bgra1.val); + _mm512_storeu_si512((__m512i*)(ptr + 16), bgra2.val); + _mm512_storeu_si512((__m512i*)(ptr + 24), bgra3.val); + } +} + +#define OPENCV_HAL_IMPL_AVX512_LOADSTORE_INTERLEAVE(_Tpvec0, _Tp0, suffix0, _Tpvec1, _Tp1, suffix1) \ +inline void v_load_deinterleave( const _Tp0* ptr, _Tpvec0& a0, _Tpvec0& b0 ) \ +{ \ + _Tpvec1 a1, b1; \ + v_load_deinterleave((const _Tp1*)ptr, a1, b1); \ + a0 = v_reinterpret_as_##suffix0(a1); \ + b0 = v_reinterpret_as_##suffix0(b1); \ +} \ +inline void v_load_deinterleave( const _Tp0* ptr, _Tpvec0& a0, _Tpvec0& b0, _Tpvec0& c0 ) \ +{ \ + _Tpvec1 a1, b1, c1; \ + v_load_deinterleave((const _Tp1*)ptr, a1, b1, c1); \ + a0 = v_reinterpret_as_##suffix0(a1); \ + b0 = v_reinterpret_as_##suffix0(b1); \ + c0 = v_reinterpret_as_##suffix0(c1); \ +} \ +inline void v_load_deinterleave( const _Tp0* ptr, _Tpvec0& a0, _Tpvec0& b0, _Tpvec0& c0, _Tpvec0& d0 ) \ +{ \ + _Tpvec1 a1, b1, c1, d1; \ + v_load_deinterleave((const _Tp1*)ptr, a1, b1, c1, d1); \ + a0 = v_reinterpret_as_##suffix0(a1); \ + b0 = v_reinterpret_as_##suffix0(b1); \ + c0 = v_reinterpret_as_##suffix0(c1); \ + d0 = v_reinterpret_as_##suffix0(d1); \ +} \ +inline void v_store_interleave( _Tp0* ptr, const _Tpvec0& a0, const _Tpvec0& b0, \ + hal::StoreMode mode=hal::STORE_UNALIGNED ) \ +{ \ + _Tpvec1 a1 = v_reinterpret_as_##suffix1(a0); \ + _Tpvec1 b1 = v_reinterpret_as_##suffix1(b0); \ + v_store_interleave((_Tp1*)ptr, a1, b1, mode); \ +} \ +inline void v_store_interleave( _Tp0* ptr, const _Tpvec0& a0, const _Tpvec0& b0, const _Tpvec0& c0, \ + hal::StoreMode mode=hal::STORE_UNALIGNED ) \ +{ \ + _Tpvec1 a1 = v_reinterpret_as_##suffix1(a0); \ + _Tpvec1 b1 = v_reinterpret_as_##suffix1(b0); \ + _Tpvec1 c1 = v_reinterpret_as_##suffix1(c0); \ + v_store_interleave((_Tp1*)ptr, a1, b1, c1, mode); \ +} \ +inline void v_store_interleave( _Tp0* ptr, const _Tpvec0& a0, const _Tpvec0& b0, \ + const _Tpvec0& c0, const _Tpvec0& d0, \ + hal::StoreMode mode=hal::STORE_UNALIGNED ) \ +{ \ + _Tpvec1 a1 = v_reinterpret_as_##suffix1(a0); \ + _Tpvec1 b1 = v_reinterpret_as_##suffix1(b0); \ + _Tpvec1 c1 = v_reinterpret_as_##suffix1(c0); \ + _Tpvec1 d1 = v_reinterpret_as_##suffix1(d0); \ + v_store_interleave((_Tp1*)ptr, a1, b1, c1, d1, mode); \ +} + +OPENCV_HAL_IMPL_AVX512_LOADSTORE_INTERLEAVE(v_int8x64, schar, s8, v_uint8x64, uchar, u8) +OPENCV_HAL_IMPL_AVX512_LOADSTORE_INTERLEAVE(v_int16x32, short, s16, v_uint16x32, ushort, u16) +OPENCV_HAL_IMPL_AVX512_LOADSTORE_INTERLEAVE(v_int32x16, int, s32, v_uint32x16, unsigned, u32) +OPENCV_HAL_IMPL_AVX512_LOADSTORE_INTERLEAVE(v_float32x16, float, f32, v_uint32x16, unsigned, u32) +OPENCV_HAL_IMPL_AVX512_LOADSTORE_INTERLEAVE(v_int64x8, int64, s64, v_uint64x8, uint64, u64) +OPENCV_HAL_IMPL_AVX512_LOADSTORE_INTERLEAVE(v_float64x8, double, f64, v_uint64x8, uint64, u64) + +////////// Mask and checks ///////// + +/** Mask **/ +inline int64 v_signmask(const v_int8x64& a) { return (int64)_mm512_movepi8_mask(a.val); } +inline int v_signmask(const v_int16x32& a) { return (int)_mm512_cmp_epi16_mask(a.val, _mm512_setzero_si512(), _MM_CMPINT_LT); } +inline int v_signmask(const v_int32x16& a) { return (int)_mm512_cmp_epi32_mask(a.val, _mm512_setzero_si512(), _MM_CMPINT_LT); } +inline int v_signmask(const v_int64x8& a) { return (int)_mm512_cmp_epi64_mask(a.val, _mm512_setzero_si512(), _MM_CMPINT_LT); } + +inline int64 v_signmask(const v_uint8x64& a) { return v_signmask(v_reinterpret_as_s8(a)); } +inline int v_signmask(const v_uint16x32& a) { return v_signmask(v_reinterpret_as_s16(a)); } +inline int v_signmask(const v_uint32x16& a) { return v_signmask(v_reinterpret_as_s32(a)); } +inline int v_signmask(const v_uint64x8& a) { return v_signmask(v_reinterpret_as_s64(a)); } +inline int v_signmask(const v_float32x16& a) { return v_signmask(v_reinterpret_as_s32(a)); } +inline int v_signmask(const v_float64x8& a) { return v_signmask(v_reinterpret_as_s64(a)); } + +/** Checks **/ +inline bool v_check_all(const v_int8x64& a) { return !(bool)_mm512_cmp_epi8_mask(a.val, _mm512_setzero_si512(), _MM_CMPINT_NLT); } +inline bool v_check_any(const v_int8x64& a) { return (bool)_mm512_movepi8_mask(a.val); } +inline bool v_check_all(const v_int16x32& a) { return !(bool)_mm512_cmp_epi16_mask(a.val, _mm512_setzero_si512(), _MM_CMPINT_NLT); } +inline bool v_check_any(const v_int16x32& a) { return (bool)_mm512_cmp_epi16_mask(a.val, _mm512_setzero_si512(), _MM_CMPINT_LT); } +inline bool v_check_all(const v_int32x16& a) { return !(bool)_mm512_cmp_epi32_mask(a.val, _mm512_setzero_si512(), _MM_CMPINT_NLT); } +inline bool v_check_any(const v_int32x16& a) { return (bool)_mm512_cmp_epi32_mask(a.val, _mm512_setzero_si512(), _MM_CMPINT_LT); } +inline bool v_check_all(const v_int64x8& a) { return !(bool)_mm512_cmp_epi64_mask(a.val, _mm512_setzero_si512(), _MM_CMPINT_NLT); } +inline bool v_check_any(const v_int64x8& a) { return (bool)_mm512_cmp_epi64_mask(a.val, _mm512_setzero_si512(), _MM_CMPINT_LT); } + +inline bool v_check_all(const v_float32x16& a) { return v_check_all(v_reinterpret_as_s32(a)); } +inline bool v_check_any(const v_float32x16& a) { return v_check_any(v_reinterpret_as_s32(a)); } +inline bool v_check_all(const v_float64x8& a) { return v_check_all(v_reinterpret_as_s64(a)); } +inline bool v_check_any(const v_float64x8& a) { return v_check_any(v_reinterpret_as_s64(a)); } +inline bool v_check_all(const v_uint8x64& a) { return v_check_all(v_reinterpret_as_s8(a)); } +inline bool v_check_all(const v_uint16x32& a) { return v_check_all(v_reinterpret_as_s16(a)); } +inline bool v_check_all(const v_uint32x16& a) { return v_check_all(v_reinterpret_as_s32(a)); } +inline bool v_check_all(const v_uint64x8& a) { return v_check_all(v_reinterpret_as_s64(a)); } +inline bool v_check_any(const v_uint8x64& a) { return v_check_any(v_reinterpret_as_s8(a)); } +inline bool v_check_any(const v_uint16x32& a) { return v_check_any(v_reinterpret_as_s16(a)); } +inline bool v_check_any(const v_uint32x16& a) { return v_check_any(v_reinterpret_as_s32(a)); } +inline bool v_check_any(const v_uint64x8& a) { return v_check_any(v_reinterpret_as_s64(a)); } + +inline int v_scan_forward(const v_int8x64& a) +{ + int64 mask = _mm512_movepi8_mask(a.val); + int mask32 = (int)mask; + return mask != 0 ? mask32 != 0 ? trailingZeros32(mask32) : 32 + trailingZeros32((int)(mask >> 32)) : 0; +} +inline int v_scan_forward(const v_uint8x64& a) { return v_scan_forward(v_reinterpret_as_s8(a)); } +inline int v_scan_forward(const v_int16x32& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s16(a))); } +inline int v_scan_forward(const v_uint16x32& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s16(a))); } +inline int v_scan_forward(const v_int32x16& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s16(a))) / 2; } +inline int v_scan_forward(const v_uint32x16& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s16(a))) / 2; } +inline int v_scan_forward(const v_float32x16& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s16(a))) / 2; } +inline int v_scan_forward(const v_int64x8& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s16(a))) / 4; } +inline int v_scan_forward(const v_uint64x8& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s16(a))) / 4; } +inline int v_scan_forward(const v_float64x8& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s16(a))) / 4; } + +inline void v512_cleanup() { _mm256_zeroall(); } + +CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END + +//! @endcond + +} // cv:: + +#endif // OPENCV_HAL_INTRIN_AVX_HPP diff --git a/IPL/include/opencv/opencv2/core/hal/intrin_cpp.hpp b/IPL/include/opencv/opencv2/core/hal/intrin_cpp.hpp index 3929e0d..859bfd7 100644 --- a/IPL/include/opencv/opencv2/core/hal/intrin_cpp.hpp +++ b/IPL/include/opencv/opencv2/core/hal/intrin_cpp.hpp @@ -42,17 +42,29 @@ // //M*/ -#ifndef __OPENCV_HAL_INTRIN_CPP_HPP__ -#define __OPENCV_HAL_INTRIN_CPP_HPP__ +#ifndef OPENCV_HAL_INTRIN_CPP_HPP +#define OPENCV_HAL_INTRIN_CPP_HPP #include #include #include #include "opencv2/core/saturate.hpp" +//! @cond IGNORED +#define CV_SIMD128_CPP 1 +#if defined(CV_FORCE_SIMD128_CPP) || defined(CV_DOXYGEN) +#define CV_SIMD128 1 +#define CV_SIMD128_64F 1 +#endif +//! @endcond + namespace cv { +#ifndef CV_DOXYGEN +CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN +#endif + /** @addtogroup core_hal_intrin "Universal intrinsics" is a types and functions set intended to simplify vectorization of code on @@ -69,10 +81,10 @@ implemented as a structure based on a one SIMD register. - cv::v_uint8x16 and cv::v_int8x16: sixteen 8-bit integer values (unsigned/signed) - char - cv::v_uint16x8 and cv::v_int16x8: eight 16-bit integer values (unsigned/signed) - short -- cv::v_uint32x4 and cv::v_int32x4: four 32-bit integer values (unsgined/signed) - int +- cv::v_uint32x4 and cv::v_int32x4: four 32-bit integer values (unsigned/signed) - int - cv::v_uint64x2 and cv::v_int64x2: two 64-bit integer values (unsigned/signed) - int64 - cv::v_float32x4: four 32-bit floating point values (signed) - float -- cv::v_float64x2: two 64-bit floating point valies (signed) - double +- cv::v_float64x2: two 64-bit floating point values (signed) - double @note cv::v_float64x2 is not implemented in NEON variant, if you want to use this type, don't forget to @@ -95,7 +107,7 @@ block and to save contents of the register to memory block. @ref v_setall_s8, @ref v_setall_u8, ..., @ref v_setzero_u8, @ref v_setzero_s8, ... - Memory operations: -@ref v_load, @ref v_load_aligned, @ref v_load_halves, +@ref v_load, @ref v_load_aligned, @ref v_load_low, @ref v_load_halves, @ref v_store, @ref v_store_aligned, @ref v_store_high, @ref v_store_low @@ -103,11 +115,12 @@ block and to save contents of the register to memory block. These operations allow to reorder or recombine elements in one or multiple vectors. -- Interleave, deinterleave (3 and 4 channels): @ref v_load_deinterleave, @ref v_store_interleave -- Expand: @ref v_load_expand, @ref v_load_expand_q, @ref v_expand -- Pack: @ref v_pack, @ref v_pack_u, @ref v_rshr_pack, @ref v_rshr_pack_u, +- Interleave, deinterleave (2, 3 and 4 channels): @ref v_load_deinterleave, @ref v_store_interleave +- Expand: @ref v_load_expand, @ref v_load_expand_q, @ref v_expand, @ref v_expand_low, @ref v_expand_high +- Pack: @ref v_pack, @ref v_pack_u, @ref v_pack_b, @ref v_rshr_pack, @ref v_rshr_pack_u, @ref v_pack_store, @ref v_pack_u_store, @ref v_rshr_pack_store, @ref v_rshr_pack_u_store - Recombine: @ref v_zip, @ref v_recombine, @ref v_combine_low, @ref v_combine_high +- Reverse: @ref v_reverse - Extract: @ref v_extract @@ -116,32 +129,32 @@ These operations allow to reorder or recombine elements in one or multiple vecto Element-wise binary and unary operations. - Arithmetics: -@ref operator+(const v_reg &a, const v_reg &b) "+", -@ref operator-(const v_reg &a, const v_reg &b) "-", -@ref operator*(const v_reg &a, const v_reg &b) "*", -@ref operator/(const v_reg &a, const v_reg &b) "/", +@ref operator +(const v_reg &a, const v_reg &b) "+", +@ref operator -(const v_reg &a, const v_reg &b) "-", +@ref operator *(const v_reg &a, const v_reg &b) "*", +@ref operator /(const v_reg &a, const v_reg &b) "/", @ref v_mul_expand - Non-saturating arithmetics: @ref v_add_wrap, @ref v_sub_wrap - Bitwise shifts: -@ref operator<<(const v_reg &a, int s) "<<", -@ref operator>>(const v_reg &a, int s) ">>", +@ref operator <<(const v_reg &a, int s) "<<", +@ref operator >>(const v_reg &a, int s) ">>", @ref v_shl, @ref v_shr - Bitwise logic: -@ref operator&(const v_reg &a, const v_reg &b) "&", -@ref operator|(const v_reg &a, const v_reg &b) "|", -@ref operator^(const v_reg &a, const v_reg &b) "^", -@ref operator~(const v_reg &a) "~" +@ref operator &(const v_reg &a, const v_reg &b) "&", +@ref operator |(const v_reg &a, const v_reg &b) "|", +@ref operator ^(const v_reg &a, const v_reg &b) "^", +@ref operator ~(const v_reg &a) "~" - Comparison: -@ref operator>(const v_reg &a, const v_reg &b) ">", -@ref operator>=(const v_reg &a, const v_reg &b) ">=", -@ref operator<(const v_reg &a, const v_reg &b) "<", -@ref operator<=(const v_reg &a, const v_reg &b) "<=", +@ref operator >(const v_reg &a, const v_reg &b) ">", +@ref operator >=(const v_reg &a, const v_reg &b) ">=", +@ref operator <(const v_reg &a, const v_reg &b) "<", +@ref operator <=(const v_reg &a, const v_reg &b) "<=", @ref operator==(const v_reg &a, const v_reg &b) "==", -@ref operator!=(const v_reg &a, const v_reg &b) "!=" +@ref operator !=(const v_reg &a, const v_reg &b) "!=" - min/max: @ref v_min, @ref v_max @@ -149,13 +162,13 @@ Element-wise binary and unary operations. Most of these operations return only one value. -- Reduce: @ref v_reduce_min, @ref v_reduce_max, @ref v_reduce_sum +- Reduce: @ref v_reduce_min, @ref v_reduce_max, @ref v_reduce_sum, @ref v_popcount - Mask: @ref v_signmask, @ref v_check_all, @ref v_check_any, @ref v_select ### Other math - Some frequent operations: @ref v_sqrt, @ref v_invsqrt, @ref v_magnitude, @ref v_sqr_magnitude -- Absolute values: @ref v_abs, @ref v_absdiff +- Absolute values: @ref v_abs, @ref v_absdiff, @ref v_absdiffs ### Conversions @@ -167,7 +180,8 @@ Different type conversions and casts: ### Matrix operations -In these operations vectors represent matrix rows/columns: @ref v_dotprod, @ref v_matmul, @ref v_transpose4x4 +In these operations vectors represent matrix rows/columns: @ref v_dotprod, @ref v_dotprod_fast, +@ref v_dotprod_expand, @ref v_dotprod_expand_fast, @ref v_matmul, @ref v_transpose4x4 ### Usability @@ -181,26 +195,38 @@ Regular integers: |load, store | x | x | x | x | x | x | |interleave | x | x | x | x | x | x | |expand | x | x | x | x | x | x | +|expand_low | x | x | x | x | x | x | +|expand_high | x | x | x | x | x | x | |expand_q | x | x | | | | | |add, sub | x | x | x | x | x | x | |add_wrap, sub_wrap | x | x | x | x | | | -|mul | | | x | x | x | x | -|mul_expand | | | x | x | x | | +|mul_wrap | x | x | x | x | | | +|mul | x | x | x | x | x | x | +|mul_expand | x | x | x | x | x | | |compare | x | x | x | x | x | x | |shift | | | x | x | x | x | -|dotprod | | | | x | | | +|dotprod | | | | x | | x | +|dotprod_fast | | | | x | | x | +|dotprod_expand | x | x | x | x | | x | +|dotprod_expand_fast| x | x | x | x | | x | |logical | x | x | x | x | x | x | |min, max | x | x | x | x | x | x | |absdiff | x | x | x | x | x | x | -|reduce | | | | | x | x | +|absdiffs | | x | | x | | | +|reduce | x | x | x | x | x | x | |mask | x | x | x | x | x | x | |pack | x | x | x | x | x | x | |pack_u | x | | x | | | | +|pack_b | x | | | | | | |unpack | x | x | x | x | x | x | |extract | x | x | x | x | x | x | +|rotate (lanes) | x | x | x | x | x | x | |cvt_flt32 | | | | | | x | |cvt_flt64 | | | | | | x | |transpose4x4 | | | | | x | x | +|reverse | x | x | x | x | x | x | +|extract_n | x | x | x | x | x | x | +|broadcast_element | | | | | x | x | Big integers: @@ -210,7 +236,11 @@ Big integers: |add, sub | x | x | |shift | x | x | |logical | x | x | +|reverse | x | x | |extract | x | x | +|rotate (lanes) | x | x | +|cvt_flt64 | | x | +|extract_n | x | x | Floating point: @@ -232,7 +262,11 @@ Floating point: |sqrt, abs | x | x | |float math | x | x | |transpose4x4 | x | | - +|extract | x | x | +|rotate (lanes) | x | x | +|reverse | x | x | +|extract_n | x | x | +|broadcast_element | x | | @{ */ @@ -240,8 +274,6 @@ template struct v_reg { //! @cond IGNORED typedef _Tp lane_type; - typedef v_reg::int_type, n> int_vec; - typedef v_reg::abs_type, n> abs_vec; enum { nlanes = n }; // !@endcond @@ -346,6 +378,13 @@ template struct v_reg return c; } + v_reg& operator=(const v_reg<_Tp, n> & r) + { + for( int i = 0; i < n; i++ ) + s[i] = r.s[i]; + return *this; + } + _Tp s[n]; //! @endcond }; @@ -371,50 +410,102 @@ typedef v_reg v_uint64x2; /** @brief Two 64-bit signed integer values */ typedef v_reg v_int64x2; -//! @brief Helper macro -//! @ingroup core_hal_intrin_impl -#define OPENCV_HAL_IMPL_BIN_OP(bin_op) \ -template inline v_reg<_Tp, n> \ - operator bin_op (const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \ -{ \ - v_reg<_Tp, n> c; \ - for( int i = 0; i < n; i++ ) \ - c.s[i] = saturate_cast<_Tp>(a.s[i] bin_op b.s[i]); \ - return c; \ -} \ -template inline v_reg<_Tp, n>& \ - operator bin_op##= (v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \ -{ \ - for( int i = 0; i < n; i++ ) \ - a.s[i] = saturate_cast<_Tp>(a.s[i] bin_op b.s[i]); \ - return a; \ -} - /** @brief Add values For all types. */ -OPENCV_HAL_IMPL_BIN_OP(+) +template CV_INLINE v_reg<_Tp, n> operator+(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b); +template CV_INLINE v_reg<_Tp, n>& operator+=(v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b); /** @brief Subtract values For all types. */ -OPENCV_HAL_IMPL_BIN_OP(-) +template CV_INLINE v_reg<_Tp, n> operator-(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b); +template CV_INLINE v_reg<_Tp, n>& operator-=(v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b); /** @brief Multiply values For 16- and 32-bit integer types and floating types. */ -OPENCV_HAL_IMPL_BIN_OP(*) +template CV_INLINE v_reg<_Tp, n> operator*(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b); +template CV_INLINE v_reg<_Tp, n>& operator*=(v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b); /** @brief Divide values For floating types only. */ -OPENCV_HAL_IMPL_BIN_OP(/) +template CV_INLINE v_reg<_Tp, n> operator/(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b); +template CV_INLINE v_reg<_Tp, n>& operator/=(v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b); -//! @brief Helper macro -//! @ingroup core_hal_intrin_impl -#define OPENCV_HAL_IMPL_BIT_OP(bit_op) \ -template inline v_reg<_Tp, n> operator bit_op \ - (const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \ + +/** @brief Bitwise AND + +Only for integer types. */ +template CV_INLINE v_reg<_Tp, n> operator&(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b); +template CV_INLINE v_reg<_Tp, n>& operator&=(v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b); + +/** @brief Bitwise OR + +Only for integer types. */ +template CV_INLINE v_reg<_Tp, n> operator|(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b); +template CV_INLINE v_reg<_Tp, n>& operator|=(v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b); + +/** @brief Bitwise XOR + +Only for integer types.*/ +template CV_INLINE v_reg<_Tp, n> operator^(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b); +template CV_INLINE v_reg<_Tp, n>& operator^=(v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b); + +/** @brief Bitwise NOT + +Only for integer types.*/ +template CV_INLINE v_reg<_Tp, n> operator~(const v_reg<_Tp, n>& a); + + +#ifndef CV_DOXYGEN + +#define CV__HAL_INTRIN_EXPAND_WITH_INTEGER_TYPES(macro_name, ...) \ +__CV_EXPAND(macro_name(uchar, __VA_ARGS__)) \ +__CV_EXPAND(macro_name(schar, __VA_ARGS__)) \ +__CV_EXPAND(macro_name(ushort, __VA_ARGS__)) \ +__CV_EXPAND(macro_name(short, __VA_ARGS__)) \ +__CV_EXPAND(macro_name(unsigned, __VA_ARGS__)) \ +__CV_EXPAND(macro_name(int, __VA_ARGS__)) \ +__CV_EXPAND(macro_name(uint64, __VA_ARGS__)) \ +__CV_EXPAND(macro_name(int64, __VA_ARGS__)) \ + +#define CV__HAL_INTRIN_EXPAND_WITH_FP_TYPES(macro_name, ...) \ +__CV_EXPAND(macro_name(float, __VA_ARGS__)) \ +__CV_EXPAND(macro_name(double, __VA_ARGS__)) \ + +#define CV__HAL_INTRIN_EXPAND_WITH_ALL_TYPES(macro_name, ...) \ +CV__HAL_INTRIN_EXPAND_WITH_INTEGER_TYPES(macro_name, __VA_ARGS__) \ +CV__HAL_INTRIN_EXPAND_WITH_FP_TYPES(macro_name, __VA_ARGS__) \ + +#define CV__HAL_INTRIN_IMPL_BIN_OP_(_Tp, bin_op) \ +template inline \ +v_reg<_Tp, n> operator bin_op (const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \ +{ \ + v_reg<_Tp, n> c; \ + for( int i = 0; i < n; i++ ) \ + c.s[i] = saturate_cast<_Tp>(a.s[i] bin_op b.s[i]); \ + return c; \ +} \ +template inline \ +v_reg<_Tp, n>& operator bin_op##= (v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \ +{ \ + for( int i = 0; i < n; i++ ) \ + a.s[i] = saturate_cast<_Tp>(a.s[i] bin_op b.s[i]); \ + return a; \ +} + +#define CV__HAL_INTRIN_IMPL_BIN_OP(bin_op) CV__HAL_INTRIN_EXPAND_WITH_ALL_TYPES(CV__HAL_INTRIN_IMPL_BIN_OP_, bin_op) + +CV__HAL_INTRIN_IMPL_BIN_OP(+) +CV__HAL_INTRIN_IMPL_BIN_OP(-) +CV__HAL_INTRIN_IMPL_BIN_OP(*) +CV__HAL_INTRIN_EXPAND_WITH_FP_TYPES(CV__HAL_INTRIN_IMPL_BIN_OP_, /) + +#define CV__HAL_INTRIN_IMPL_BIT_OP_(_Tp, bit_op) \ +template CV_INLINE \ +v_reg<_Tp, n> operator bit_op (const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \ { \ v_reg<_Tp, n> c; \ typedef typename V_TypeTraits<_Tp>::int_type itype; \ @@ -423,8 +514,8 @@ template inline v_reg<_Tp, n> operator bit_op \ V_TypeTraits<_Tp>::reinterpret_int(b.s[i]))); \ return c; \ } \ -template inline v_reg<_Tp, n>& operator \ - bit_op##= (v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \ +template CV_INLINE \ +v_reg<_Tp, n>& operator bit_op##= (v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \ { \ typedef typename V_TypeTraits<_Tp>::int_type itype; \ for( int i = 0; i < n; i++ ) \ @@ -433,31 +524,29 @@ template inline v_reg<_Tp, n>& operator \ return a; \ } -/** @brief Bitwise AND - -Only for integer types. */ -OPENCV_HAL_IMPL_BIT_OP(&) +#define CV__HAL_INTRIN_IMPL_BIT_OP(bit_op) \ +CV__HAL_INTRIN_EXPAND_WITH_INTEGER_TYPES(CV__HAL_INTRIN_IMPL_BIT_OP_, bit_op) \ +CV__HAL_INTRIN_EXPAND_WITH_FP_TYPES(CV__HAL_INTRIN_IMPL_BIT_OP_, bit_op) /* TODO: FIXIT remove this after masks refactoring */ -/** @brief Bitwise OR -Only for integer types. */ -OPENCV_HAL_IMPL_BIT_OP(|) +CV__HAL_INTRIN_IMPL_BIT_OP(&) +CV__HAL_INTRIN_IMPL_BIT_OP(|) +CV__HAL_INTRIN_IMPL_BIT_OP(^) -/** @brief Bitwise XOR +#define CV__HAL_INTRIN_IMPL_BITWISE_NOT_(_Tp, dummy) \ +template CV_INLINE \ +v_reg<_Tp, n> operator ~ (const v_reg<_Tp, n>& a) \ +{ \ + v_reg<_Tp, n> c; \ + for( int i = 0; i < n; i++ ) \ + c.s[i] = V_TypeTraits<_Tp>::reinterpret_from_int(~V_TypeTraits<_Tp>::reinterpret_int(a.s[i])); \ + return c; \ +} \ -Only for integer types.*/ -OPENCV_HAL_IMPL_BIT_OP(^) +CV__HAL_INTRIN_EXPAND_WITH_INTEGER_TYPES(CV__HAL_INTRIN_IMPL_BITWISE_NOT_, ~) -/** @brief Bitwise NOT +#endif // !CV_DOXYGEN -Only for integer types.*/ -template inline v_reg<_Tp, n> operator ~ (const v_reg<_Tp, n>& a) -{ - v_reg<_Tp, n> c; - for( int i = 0; i < n; i++ ) - c.s[i] = V_TypeTraits<_Tp>::reinterpret_from_int(~V_TypeTraits<_Tp>::reinterpret_int(a.s[i])); - return c; -} //! @brief Helper macro //! @ingroup core_hal_intrin_impl @@ -470,6 +559,27 @@ template inline v_reg<_Tp2, n> func(const v_reg<_Tp, n>& a) return c; \ } +//! @brief Helper macro +//! @ingroup core_hal_intrin_impl +#define OPENCV_HAL_IMPL_MATH_FUNC_FLOAT(func, cfunc) \ +inline v_reg func(const v_reg& a) \ +{ \ + v_reg c; \ + for( int i = 0; i < 4; i++ ) \ + c.s[i] = cfunc(a.s[i]); \ + return c; \ +} \ +inline v_reg func(const v_reg& a) \ +{ \ + v_reg c; \ + for( int i = 0; i < 2; i++ ) \ + { \ + c.s[i] = cfunc(a.s[i]); \ + c.s[i + 2] = 0; \ + } \ + return c; \ +} + /** @brief Square root of elements Only for floating point types.*/ @@ -491,22 +601,22 @@ OPENCV_HAL_IMPL_MATH_FUNC(v_abs, (typename V_TypeTraits<_Tp>::abs_type)std::abs, /** @brief Round elements Only for floating point types.*/ -OPENCV_HAL_IMPL_MATH_FUNC(v_round, cvRound, int) +OPENCV_HAL_IMPL_MATH_FUNC_FLOAT(v_round, cvRound) /** @brief Floor elements Only for floating point types.*/ -OPENCV_HAL_IMPL_MATH_FUNC(v_floor, cvFloor, int) +OPENCV_HAL_IMPL_MATH_FUNC_FLOAT(v_floor, cvFloor) /** @brief Ceil elements Only for floating point types.*/ -OPENCV_HAL_IMPL_MATH_FUNC(v_ceil, cvCeil, int) +OPENCV_HAL_IMPL_MATH_FUNC_FLOAT(v_ceil, cvCeil) /** @brief Truncate elements Only for floating point types.*/ -OPENCV_HAL_IMPL_MATH_FUNC(v_trunc, int, int) +OPENCV_HAL_IMPL_MATH_FUNC_FLOAT(v_trunc, int) //! @brief Helper macro //! @ingroup core_hal_intrin_impl @@ -560,7 +670,7 @@ OPENCV_HAL_IMPL_MINMAX_FUNC(v_max, std::max) @code {A1 A2 A3 ...} => min(A1,A2,A3,...) @endcode -For 32-bit integer and 32-bit floating point types. */ +For all types except 64-bit integer and 64-bit floating point types. */ OPENCV_HAL_IMPL_REDUCE_MINMAX_FUNC(v_reduce_min, std::min) /** @brief Find one max value @@ -569,9 +679,45 @@ OPENCV_HAL_IMPL_REDUCE_MINMAX_FUNC(v_reduce_min, std::min) @code {A1 A2 A3 ...} => max(A1,A2,A3,...) @endcode -For 32-bit integer and 32-bit floating point types. */ +For all types except 64-bit integer and 64-bit floating point types. */ OPENCV_HAL_IMPL_REDUCE_MINMAX_FUNC(v_reduce_max, std::max) +static const unsigned char popCountTable[] = +{ + 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, + 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, + 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, + 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, + 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, + 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8, +}; +/** @brief Count the 1 bits in the vector lanes and return result as corresponding unsigned type + +Scheme: +@code +{A1 A2 A3 ...} => {popcount(A1), popcount(A2), popcount(A3), ...} +@endcode +For all integer types. */ +template +inline v_reg::abs_type, n> v_popcount(const v_reg<_Tp, n>& a) +{ + v_reg::abs_type, n> b = v_reg::abs_type, n>::zero(); + for (int i = 0; i < n*(int)sizeof(_Tp); i++) + b.s[i/sizeof(_Tp)] += popCountTable[v_reinterpret_as_u8(a).s[i]]; + return b; +} + + //! @cond IGNORED template inline void v_minmax( const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b, @@ -628,9 +774,28 @@ OPENCV_HAL_IMPL_CMP_OP(==) For all types except 64-bit integer values. */ OPENCV_HAL_IMPL_CMP_OP(!=) +template +inline v_reg v_not_nan(const v_reg& a) +{ + typedef typename V_TypeTraits::int_type itype; + v_reg c; + for (int i = 0; i < n; i++) + c.s[i] = V_TypeTraits::reinterpret_from_int((itype)-(int)(a.s[i] == a.s[i])); + return c; +} +template +inline v_reg v_not_nan(const v_reg& a) +{ + typedef typename V_TypeTraits::int_type itype; + v_reg c; + for (int i = 0; i < n; i++) + c.s[i] = V_TypeTraits::reinterpret_from_int((itype)-(int)(a.s[i] == a.s[i])); + return c; +} + //! @brief Helper macro //! @ingroup core_hal_intrin_impl -#define OPENCV_HAL_IMPL_ADD_SUB_OP(func, bin_op, cast_op, _Tp2) \ +#define OPENCV_HAL_IMPL_ARITHM_OP(func, bin_op, cast_op, _Tp2) \ template \ inline v_reg<_Tp2, n> func(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \ { \ @@ -644,12 +809,17 @@ inline v_reg<_Tp2, n> func(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \ /** @brief Add values without saturation For 8- and 16-bit integer values. */ -OPENCV_HAL_IMPL_ADD_SUB_OP(v_add_wrap, +, (_Tp), _Tp) +OPENCV_HAL_IMPL_ARITHM_OP(v_add_wrap, +, (_Tp), _Tp) /** @brief Subtract values without saturation For 8- and 16-bit integer values. */ -OPENCV_HAL_IMPL_ADD_SUB_OP(v_sub_wrap, -, (_Tp), _Tp) +OPENCV_HAL_IMPL_ARITHM_OP(v_sub_wrap, -, (_Tp), _Tp) + +/** @brief Multiply values without saturation + +For 8- and 16-bit integer values. */ +OPENCV_HAL_IMPL_ARITHM_OP(v_mul_wrap, *, (_Tp), _Tp) //! @cond IGNORED template inline T _absdiff(T a, T b) @@ -672,7 +842,7 @@ inline v_reg::abs_type, n> v_absdiff(const v_reg<_Tp, { typedef typename V_TypeTraits<_Tp>::abs_type rtype; v_reg c; - const rtype mask = std::numeric_limits<_Tp>::is_signed ? (1 << (sizeof(rtype)*8 - 1)) : 0; + const rtype mask = (rtype)(std::numeric_limits<_Tp>::is_signed ? (1 << (sizeof(rtype)*8 - 1)) : 0); for( int i = 0; i < n; i++ ) { rtype ua = a.s[i] ^ mask; @@ -704,6 +874,19 @@ inline v_float64x2 v_absdiff(const v_float64x2& a, const v_float64x2& b) return c; } +/** @brief Saturating absolute difference + +Returns \f$ saturate(|a - b|) \f$ . +For 8-, 16-bit signed integer source types. */ +template +inline v_reg<_Tp, n> v_absdiffs(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) +{ + v_reg<_Tp, n> c; + for( int i = 0; i < n; i++) + c.s[i] = saturate_cast<_Tp>(std::abs(a.s[i] - b.s[i])); + return c; +} + /** @brief Inversed square root Returns \f$ 1/sqrt(a) \f$ @@ -745,11 +928,11 @@ inline v_reg<_Tp, n> v_sqr_magnitude(const v_reg<_Tp, n>& a, const v_reg<_Tp, n> /** @brief Multiply and add -Returns \f$ a*b + c \f$ -For floating point types only. */ + Returns \f$ a*b + c \f$ + For floating point types and signed 32bit int only. */ template -inline v_reg<_Tp, n> v_muladd(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b, - const v_reg<_Tp, n>& c) +inline v_reg<_Tp, n> v_fma(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b, + const v_reg<_Tp, n>& c) { v_reg<_Tp, n> d; for( int i = 0; i < n; i++ ) @@ -757,20 +940,29 @@ inline v_reg<_Tp, n> v_muladd(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b, return d; } +/** @brief A synonym for v_fma */ +template +inline v_reg<_Tp, n> v_muladd(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b, + const v_reg<_Tp, n>& c) +{ + return v_fma(a, b, c); +} + /** @brief Dot product of elements Multiply values in two registers and sum adjacent result pairs. + Scheme: @code {A1 A2 ...} // 16-bit x {B1 B2 ...} // 16-bit ------------- {A1B1+A2B2 ...} // 32-bit + @endcode -Implemented only for 16-bit signed source type (v_int16x8). */ template inline v_reg::w_type, n/2> - v_dotprod(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) +v_dotprod(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) { typedef typename V_TypeTraits<_Tp>::w_type w_type; v_reg c; @@ -779,6 +971,117 @@ template inline v_reg::w_type, n return c; } +/** @brief Dot product of elements + +Same as cv::v_dotprod, but add a third element to the sum of adjacent pairs. +Scheme: +@code + {A1 A2 ...} // 16-bit +x {B1 B2 ...} // 16-bit +------------- + {A1B1+A2B2+C1 ...} // 32-bit +@endcode +*/ +template inline v_reg::w_type, n/2> +v_dotprod(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b, + const v_reg::w_type, n / 2>& c) +{ + typedef typename V_TypeTraits<_Tp>::w_type w_type; + v_reg s; + for( int i = 0; i < (n/2); i++ ) + s.s[i] = (w_type)a.s[i*2]*b.s[i*2] + (w_type)a.s[i*2+1]*b.s[i*2+1] + c.s[i]; + return s; +} + +/** @brief Fast Dot product of elements + +Same as cv::v_dotprod, but it may perform unorder sum between result pairs in some platforms, +this intrinsic can be used if the sum among all lanes is only matters +and also it should be yielding better performance on the affected platforms. + +*/ +template inline v_reg::w_type, n/2> +v_dotprod_fast(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) +{ return v_dotprod(a, b); } + +/** @brief Fast Dot product of elements + +Same as cv::v_dotprod_fast, but add a third element to the sum of adjacent pairs. +*/ +template inline v_reg::w_type, n/2> +v_dotprod_fast(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b, + const v_reg::w_type, n / 2>& c) +{ return v_dotprod(a, b, c); } + +/** @brief Dot product of elements and expand + +Multiply values in two registers and expand the sum of adjacent result pairs. + +Scheme: +@code + {A1 A2 A3 A4 ...} // 8-bit +x {B1 B2 B3 B4 ...} // 8-bit +------------- + {A1B1+A2B2+A3B3+A4B4 ...} // 32-bit + +@endcode +*/ +template inline v_reg::q_type, n/4> +v_dotprod_expand(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) +{ + typedef typename V_TypeTraits<_Tp>::q_type q_type; + v_reg s; + for( int i = 0; i < (n/4); i++ ) + s.s[i] = (q_type)a.s[i*4 ]*b.s[i*4 ] + (q_type)a.s[i*4 + 1]*b.s[i*4 + 1] + + (q_type)a.s[i*4 + 2]*b.s[i*4 + 2] + (q_type)a.s[i*4 + 3]*b.s[i*4 + 3]; + return s; +} + +/** @brief Dot product of elements + +Same as cv::v_dotprod_expand, but add a third element to the sum of adjacent pairs. +Scheme: +@code + {A1 A2 A3 A4 ...} // 8-bit +x {B1 B2 B3 B4 ...} // 8-bit +------------- + {A1B1+A2B2+A3B3+A4B4+C1 ...} // 32-bit +@endcode +*/ +template inline v_reg::q_type, n/4> +v_dotprod_expand(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b, + const v_reg::q_type, n / 4>& c) +{ + typedef typename V_TypeTraits<_Tp>::q_type q_type; + v_reg s; + for( int i = 0; i < (n/4); i++ ) + s.s[i] = (q_type)a.s[i*4 ]*b.s[i*4 ] + (q_type)a.s[i*4 + 1]*b.s[i*4 + 1] + + (q_type)a.s[i*4 + 2]*b.s[i*4 + 2] + (q_type)a.s[i*4 + 3]*b.s[i*4 + 3] + c.s[i]; + return s; +} + +/** @brief Fast Dot product of elements and expand + +Multiply values in two registers and expand the sum of adjacent result pairs. + +Same as cv::v_dotprod_expand, but it may perform unorder sum between result pairs in some platforms, +this intrinsic can be used if the sum among all lanes is only matters +and also it should be yielding better performance on the affected platforms. + +*/ +template inline v_reg::q_type, n/4> +v_dotprod_expand_fast(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) +{ return v_dotprod_expand(a, b); } + +/** @brief Fast Dot product of elements + +Same as cv::v_dotprod_expand_fast, but add a third element to the sum of adjacent pairs. +*/ +template inline v_reg::q_type, n/4> +v_dotprod_expand_fast(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b, + const v_reg::q_type, n / 4>& c) +{ return v_dotprod_expand(a, b, c); } + /** @brief Multiply and expand Multiply values two registers and store results in two registers with wider pack type. @@ -810,6 +1113,20 @@ template inline void v_mul_expand(const v_reg<_Tp, n>& a, c } } +/** @brief Multiply and extract high part + +Multiply values two registers and store high part of the results. +Implemented only for 16-bit source types (v_int16x8, v_uint16x8). Returns \f$ a*b >> 16 \f$ +*/ +template inline v_reg<_Tp, n> v_mul_hi(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) +{ + typedef typename V_TypeTraits<_Tp>::w_type w_type; + v_reg<_Tp, n> c; + for (int i = 0; i < n; i++) + c.s[i] = (_Tp)(((w_type)a.s[i] * b.s[i]) >> sizeof(_Tp)*8); + return c; +} + //! @cond IGNORED template inline void v_hsum(const v_reg<_Tp, n>& a, v_reg::w_type, n/2>& c) @@ -836,12 +1153,65 @@ template inline v_reg<_Tp, n> operator shift_op(const v_reg /** @brief Bitwise shift left For 16-, 32- and 64-bit integer values. */ -OPENCV_HAL_IMPL_SHIFT_OP(<<) +OPENCV_HAL_IMPL_SHIFT_OP(<< ) /** @brief Bitwise shift right For 16-, 32- and 64-bit integer values. */ -OPENCV_HAL_IMPL_SHIFT_OP(>>) +OPENCV_HAL_IMPL_SHIFT_OP(>> ) + +//! @brief Helper macro +//! @ingroup core_hal_intrin_impl +#define OPENCV_HAL_IMPL_ROTATE_SHIFT_OP(suffix,opA,opB) \ +template inline v_reg<_Tp, n> v_rotate_##suffix(const v_reg<_Tp, n>& a) \ +{ \ + v_reg<_Tp, n> b; \ + for (int i = 0; i < n; i++) \ + { \ + int sIndex = i opA imm; \ + if (0 <= sIndex && sIndex < n) \ + { \ + b.s[i] = a.s[sIndex]; \ + } \ + else \ + { \ + b.s[i] = 0; \ + } \ + } \ + return b; \ +} \ +template inline v_reg<_Tp, n> v_rotate_##suffix(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \ +{ \ + v_reg<_Tp, n> c; \ + for (int i = 0; i < n; i++) \ + { \ + int aIndex = i opA imm; \ + int bIndex = i opA imm opB n; \ + if (0 <= bIndex && bIndex < n) \ + { \ + c.s[i] = b.s[bIndex]; \ + } \ + else if (0 <= aIndex && aIndex < n) \ + { \ + c.s[i] = a.s[aIndex]; \ + } \ + else \ + { \ + c.s[i] = 0; \ + } \ + } \ + return c; \ +} + +/** @brief Element shift left among vector + +For all type */ +OPENCV_HAL_IMPL_ROTATE_SHIFT_OP(left, -, +) + +/** @brief Element shift right among vector + +For all type */ +OPENCV_HAL_IMPL_ROTATE_SHIFT_OP(right, +, -) /** @brief Sum packed values @@ -849,7 +1219,7 @@ OPENCV_HAL_IMPL_SHIFT_OP(>>) @code {A1 A2 A3 ...} => sum{A1,A2,A3,...} @endcode -For 32-bit integer and 32-bit floating point types.*/ +*/ template inline typename V_TypeTraits<_Tp>::sum_type v_reduce_sum(const v_reg<_Tp, n>& a) { typename V_TypeTraits<_Tp>::sum_type c = a.s[0]; @@ -858,7 +1228,44 @@ template inline typename V_TypeTraits<_Tp>::sum_type v_redu return c; } +/** @brief Sums all elements of each input vector, returns the vector of sums + + Scheme: + @code + result[0] = a[0] + a[1] + a[2] + a[3] + result[1] = b[0] + b[1] + b[2] + b[3] + result[2] = c[0] + c[1] + c[2] + c[3] + result[3] = d[0] + d[1] + d[2] + d[3] + @endcode +*/ +inline v_float32x4 v_reduce_sum4(const v_float32x4& a, const v_float32x4& b, + const v_float32x4& c, const v_float32x4& d) +{ + v_float32x4 r; + r.s[0] = a.s[0] + a.s[1] + a.s[2] + a.s[3]; + r.s[1] = b.s[0] + b.s[1] + b.s[2] + b.s[3]; + r.s[2] = c.s[0] + c.s[1] + c.s[2] + c.s[3]; + r.s[3] = d.s[0] + d.s[1] + d.s[2] + d.s[3]; + return r; +} + +/** @brief Sum absolute differences of values + +Scheme: +@code +{A1 A2 A3 ...} {B1 B2 B3 ...} => sum{ABS(A1-B1),abs(A2-B2),abs(A3-B3),...} +@endcode +For all types except 64-bit types.*/ +template inline typename V_TypeTraits< typename V_TypeTraits<_Tp>::abs_type >::sum_type v_reduce_sad(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) +{ + typename V_TypeTraits< typename V_TypeTraits<_Tp>::abs_type >::sum_type c = _absdiff(a.s[0], b.s[0]); + for (int i = 1; i < n; i++) + c += _absdiff(a.s[i], b.s[i]); + return c; +} + /** @brief Get negative values mask +@deprecated v_signmask depends on a lane count heavily and therefore isn't universal enough Returned value is a bit mask with bits set to 1 on places corresponding to negative packed values indexes. Example: @@ -866,7 +1273,7 @@ Returned value is a bit mask with bits set to 1 on places corresponding to negat v_int32x4 r; // set to {-1, -1, 1, 1} int mask = v_signmask(r); // mask = 3 <== 00000000 00000000 00000000 00000011 @endcode -For all types except 64-bit. */ +*/ template inline int v_signmask(const v_reg<_Tp, n>& a) { int mask = 0; @@ -875,10 +1282,27 @@ template inline int v_signmask(const v_reg<_Tp, n>& a) return mask; } +/** @brief Get first negative lane index + +Returned value is an index of first negative lane (undefined for input of all positive values) +Example: +@code{.cpp} +v_int32x4 r; // set to {0, 0, -1, -1} +int idx = v_heading_zeros(r); // idx = 2 +@endcode +*/ +template inline int v_scan_forward(const v_reg<_Tp, n>& a) +{ + for (int i = 0; i < n; i++) + if(V_TypeTraits<_Tp>::reinterpret_int(a.s[i]) < 0) + return i; + return 0; +} + /** @brief Check if all packed values are less than zero Unsigned values will be casted to signed: `uchar 254 => char -2`. -For all types except 64-bit. */ +*/ template inline bool v_check_all(const v_reg<_Tp, n>& a) { for( int i = 0; i < n; i++ ) @@ -890,7 +1314,7 @@ template inline bool v_check_all(const v_reg<_Tp, n>& a) /** @brief Check if any of packed values is less than zero Unsigned values will be casted to signed: `uchar 254 => char -2`. -For all types except 64-bit. */ +*/ template inline bool v_check_any(const v_reg<_Tp, n>& a) { for( int i = 0; i < n; i++ ) @@ -899,13 +1323,16 @@ template inline bool v_check_any(const v_reg<_Tp, n>& a) return false; } -/** @brief Bitwise select +/** @brief Per-element select (blend operation) + +Return value will be built by combining values _a_ and _b_ using the following scheme: + result[i] = mask[i] ? a[i] : b[i]; -Return value will be built by combining values a and b using the following scheme: -If the i-th bit in _mask_ is 1 - select i-th bit from _a_ -else - select i-th bit from _b_ */ +@note: _mask_ element values are restricted to these values: +- 0: select element from _b_ +- 0xff/0xffff/etc: select element from _a_ +(fully compatible with bitwise-based operator) +*/ template inline v_reg<_Tp, n> v_select(const v_reg<_Tp, n>& mask, const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) { @@ -915,8 +1342,8 @@ template inline v_reg<_Tp, n> v_select(const v_reg<_Tp, n>& for( int i = 0; i < n; i++ ) { int_type m = Traits::reinterpret_int(mask.s[i]); - c.s[i] = Traits::reinterpret_from_int((Traits::reinterpret_int(a.s[i]) & m) - | (Traits::reinterpret_int(b.s[i]) & ~m)); + CV_DbgAssert(m == 0 || m == (~(int_type)0)); // restrict mask values: 0 or 0xff/0xffff/etc + c.s[i] = m ? a.s[i] : b.s[i]; } return c; } @@ -940,13 +1367,51 @@ template inline void v_expand(const v_reg<_Tp, n>& a, } } -//! @cond IGNORED -template inline v_reg::int_type, n> - v_reinterpret_as_int(const v_reg<_Tp, n>& a) -{ - v_reg::int_type, n> c; - for( int i = 0; i < n; i++ ) - c.s[i] = V_TypeTraits<_Tp>::reinterpret_int(a.s[i]); +/** @brief Expand lower values to the wider pack type + +Same as cv::v_expand, but return lower half of the vector. + +Scheme: +@code + int32x4 int64x2 +{A B C D} ==> {A B} +@endcode */ +template +inline v_reg::w_type, n/2> +v_expand_low(const v_reg<_Tp, n>& a) +{ + v_reg::w_type, n/2> b; + for( int i = 0; i < (n/2); i++ ) + b.s[i] = a.s[i]; + return b; +} + +/** @brief Expand higher values to the wider pack type + +Same as cv::v_expand_low, but expand higher half of the vector instead. + +Scheme: +@code + int32x4 int64x2 +{A B C D} ==> {C D} +@endcode */ +template +inline v_reg::w_type, n/2> +v_expand_high(const v_reg<_Tp, n>& a) +{ + v_reg::w_type, n/2> b; + for( int i = 0; i < (n/2); i++ ) + b.s[i] = a.s[i+(n/2)]; + return b; +} + +//! @cond IGNORED +template inline v_reg::int_type, n> + v_reinterpret_as_int(const v_reg<_Tp, n>& a) +{ + v_reg::int_type, n> c; + for( int i = 0; i < n; i++ ) + c.s[i] = V_TypeTraits<_Tp>::reinterpret_int(a.s[i]); return c; } @@ -993,21 +1458,52 @@ template inline void v_zip( const v_reg<_Tp, n>& a0, const @return register object @note Returned type will be detected from passed pointer type, for example uchar ==> cv::v_uint8x16, int ==> cv::v_int32x4, etc. + +@note Alignment requirement: +if CV_STRONG_ALIGNMENT=1 then passed pointer must be aligned (`sizeof(lane type)` should be enough). +Do not cast pointer types without runtime check for pointer alignment (like `uchar*` => `int*`). */ template -inline v_reg<_Tp, V_SIMD128Traits<_Tp>::nlanes> v_load(const _Tp* ptr) +inline v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128> v_load(const _Tp* ptr) { - return v_reg<_Tp, V_SIMD128Traits<_Tp>::nlanes>(ptr); +#if CV_STRONG_ALIGNMENT + CV_Assert(isAligned(ptr)); +#endif + return v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128>(ptr); } /** @brief Load register contents from memory (aligned) -similar to cv::v_load, but source memory block should be aligned (to 16-byte boundary) +similar to cv::v_load, but source memory block should be aligned (to 16-byte boundary in case of SIMD128, 32-byte - SIMD256, etc) + */ +template +inline v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128> v_load_aligned(const _Tp* ptr) +{ + CV_Assert(isAligned::nlanes128>)>(ptr)); + return v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128>(ptr); +} + +/** @brief Load 64-bits of data to lower part (high part is undefined). + +@param ptr memory block containing data for first half (0..n/2) + +@code{.cpp} +int lo[2] = { 1, 2 }; +v_int32x4 r = v_load_low(lo); +@endcode */ template -inline v_reg<_Tp, V_SIMD128Traits<_Tp>::nlanes> v_load_aligned(const _Tp* ptr) +inline v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128> v_load_low(const _Tp* ptr) { - return v_reg<_Tp, V_SIMD128Traits<_Tp>::nlanes>(ptr); +#if CV_STRONG_ALIGNMENT + CV_Assert(isAligned(ptr)); +#endif + v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128> c; + for( int i = 0; i < c.nlanes/2; i++ ) + { + c.s[i] = ptr[i]; + } + return c; } /** @brief Load register contents from two memory blocks @@ -1021,9 +1517,13 @@ v_int32x4 r = v_load_halves(lo, hi); @endcode */ template -inline v_reg<_Tp, V_SIMD128Traits<_Tp>::nlanes> v_load_halves(const _Tp* loptr, const _Tp* hiptr) +inline v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128> v_load_halves(const _Tp* loptr, const _Tp* hiptr) { - v_reg<_Tp, V_SIMD128Traits<_Tp>::nlanes> c; +#if CV_STRONG_ALIGNMENT + CV_Assert(isAligned(loptr)); + CV_Assert(isAligned(hiptr)); +#endif + v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128> c; for( int i = 0; i < c.nlanes/2; i++ ) { c.s[i] = loptr[i]; @@ -1042,11 +1542,14 @@ v_int32x4 r = v_load_expand(buf); // r = {1, 2, 3, 4} - type is int32 @endcode For 8-, 16-, 32-bit integer source types. */ template -inline v_reg::w_type, V_SIMD128Traits<_Tp>::nlanes / 2> +inline v_reg::w_type, V_TypeTraits<_Tp>::nlanes128 / 2> v_load_expand(const _Tp* ptr) { +#if CV_STRONG_ALIGNMENT + CV_Assert(isAligned(ptr)); +#endif typedef typename V_TypeTraits<_Tp>::w_type w_type; - v_reg::nlanes> c; + v_reg::nlanes128> c; for( int i = 0; i < c.nlanes; i++ ) { c.s[i] = ptr[i]; @@ -1063,11 +1566,14 @@ v_int32x4 r = v_load_q(buf); // r = {1, 2, 3, 4} - type is int32 @endcode For 8-bit integer source types. */ template -inline v_reg::q_type, V_SIMD128Traits<_Tp>::nlanes / 4> +inline v_reg::q_type, V_TypeTraits<_Tp>::nlanes128 / 4> v_load_expand_q(const _Tp* ptr) { +#if CV_STRONG_ALIGNMENT + CV_Assert(isAligned(ptr)); +#endif typedef typename V_TypeTraits<_Tp>::q_type q_type; - v_reg::nlanes> c; + v_reg::nlanes128> c; for( int i = 0; i < c.nlanes; i++ ) { c.s[i] = ptr[i]; @@ -1075,17 +1581,42 @@ v_load_expand_q(const _Tp* ptr) return c; } -/** @brief Load and deinterleave (4 channels) +/** @brief Load and deinterleave (2 channels) -Load data from memory deinterleave and store to 4 registers. +Load data from memory deinterleave and store to 2 registers. Scheme: @code -{A1 B1 C1 D1 A2 B2 C2 D2 ...} ==> {A1 A2 ...}, {B1 B2 ...}, {C1 C2 ...}, {D1 D2 ...} +{A1 B1 A2 B2 ...} ==> {A1 A2 ...}, {B1 B2 ...} +@endcode +For all types except 64-bit. */ +template inline void v_load_deinterleave(const _Tp* ptr, v_reg<_Tp, n>& a, + v_reg<_Tp, n>& b) +{ +#if CV_STRONG_ALIGNMENT + CV_Assert(isAligned(ptr)); +#endif + int i, i2; + for( i = i2 = 0; i < n; i++, i2 += 2 ) + { + a.s[i] = ptr[i2]; + b.s[i] = ptr[i2+1]; + } +} + +/** @brief Load and deinterleave (3 channels) + +Load data from memory deinterleave and store to 3 registers. +Scheme: +@code +{A1 B1 C1 A2 B2 C2 ...} ==> {A1 A2 ...}, {B1 B2 ...}, {C1 C2 ...} @endcode For all types except 64-bit. */ template inline void v_load_deinterleave(const _Tp* ptr, v_reg<_Tp, n>& a, v_reg<_Tp, n>& b, v_reg<_Tp, n>& c) { +#if CV_STRONG_ALIGNMENT + CV_Assert(isAligned(ptr)); +#endif int i, i3; for( i = i3 = 0; i < n; i++, i3 += 3 ) { @@ -1095,12 +1626,12 @@ template inline void v_load_deinterleave(const _Tp* ptr, v_ } } -/** @brief Load and deinterleave (3 channels) +/** @brief Load and deinterleave (4 channels) -Load data from memory deinterleave and store to 3 registers. +Load data from memory deinterleave and store to 4 registers. Scheme: @code -{A1 B1 C1 A2 B2 C2 ...} ==> {A1 A2 ...}, {B1 B2 ...}, {C1 C2 ...} +{A1 B1 C1 D1 A2 B2 C2 D2 ...} ==> {A1 A2 ...}, {B1 B2 ...}, {C1 C2 ...}, {D1 D2 ...} @endcode For all types except 64-bit. */ template @@ -1108,6 +1639,9 @@ inline void v_load_deinterleave(const _Tp* ptr, v_reg<_Tp, n>& a, v_reg<_Tp, n>& b, v_reg<_Tp, n>& c, v_reg<_Tp, n>& d) { +#if CV_STRONG_ALIGNMENT + CV_Assert(isAligned(ptr)); +#endif int i, i4; for( i = i4 = 0; i < n; i++, i4 += 4 ) { @@ -1118,18 +1652,46 @@ inline void v_load_deinterleave(const _Tp* ptr, v_reg<_Tp, n>& a, } } +/** @brief Interleave and store (2 channels) + +Interleave and store data from 2 registers to memory. +Scheme: +@code +{A1 A2 ...}, {B1 B2 ...} ==> {A1 B1 A2 B2 ...} +@endcode +For all types except 64-bit. */ +template +inline void v_store_interleave( _Tp* ptr, const v_reg<_Tp, n>& a, + const v_reg<_Tp, n>& b, + hal::StoreMode /*mode*/=hal::STORE_UNALIGNED) +{ +#if CV_STRONG_ALIGNMENT + CV_Assert(isAligned(ptr)); +#endif + int i, i2; + for( i = i2 = 0; i < n; i++, i2 += 2 ) + { + ptr[i2] = a.s[i]; + ptr[i2+1] = b.s[i]; + } +} + /** @brief Interleave and store (3 channels) Interleave and store data from 3 registers to memory. Scheme: @code -{A1 A2 ...}, {B1 B2 ...}, {C1 C2 ...}, {D1 D2 ...} ==> {A1 B1 C1 D1 A2 B2 C2 D2 ...} +{A1 A2 ...}, {B1 B2 ...}, {C1 C2 ...} ==> {A1 B1 C1 A2 B2 C2 ...} @endcode For all types except 64-bit. */ template inline void v_store_interleave( _Tp* ptr, const v_reg<_Tp, n>& a, - const v_reg<_Tp, n>& b, const v_reg<_Tp, n>& c) + const v_reg<_Tp, n>& b, const v_reg<_Tp, n>& c, + hal::StoreMode /*mode*/=hal::STORE_UNALIGNED) { +#if CV_STRONG_ALIGNMENT + CV_Assert(isAligned(ptr)); +#endif int i, i3; for( i = i3 = 0; i < n; i++, i3 += 3 ) { @@ -1149,8 +1711,12 @@ Interleave and store data from 4 registers to memory. For all types except 64-bit. */ template inline void v_store_interleave( _Tp* ptr, const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b, const v_reg<_Tp, n>& c, - const v_reg<_Tp, n>& d) + const v_reg<_Tp, n>& d, + hal::StoreMode /*mode*/=hal::STORE_UNALIGNED) { +#if CV_STRONG_ALIGNMENT + CV_Assert(isAligned(ptr)); +#endif int i, i4; for( i = i4 = 0; i < n; i++, i4 += 4 ) { @@ -1172,10 +1738,22 @@ Pointer can be unaligned. */ template inline void v_store(_Tp* ptr, const v_reg<_Tp, n>& a) { +#if CV_STRONG_ALIGNMENT + CV_Assert(isAligned(ptr)); +#endif for( int i = 0; i < n; i++ ) ptr[i] = a.s[i]; } +template +inline void v_store(_Tp* ptr, const v_reg<_Tp, n>& a, hal::StoreMode /*mode*/) +{ +#if CV_STRONG_ALIGNMENT + CV_Assert(isAligned(ptr)); +#endif + v_store(ptr, a); +} + /** @brief Store data to memory (lower half) Store lower half of register contents to memory. @@ -1186,6 +1764,9 @@ Store lower half of register contents to memory. template inline void v_store_low(_Tp* ptr, const v_reg<_Tp, n>& a) { +#if CV_STRONG_ALIGNMENT + CV_Assert(isAligned(ptr)); +#endif for( int i = 0; i < (n/2); i++ ) ptr[i] = a.s[i]; } @@ -1200,6 +1781,9 @@ Store higher half of register contents to memory. template inline void v_store_high(_Tp* ptr, const v_reg<_Tp, n>& a) { +#if CV_STRONG_ALIGNMENT + CV_Assert(isAligned(ptr)); +#endif for( int i = 0; i < (n/2); i++ ) ptr[i] = a.s[i+(n/2)]; } @@ -1215,8 +1799,22 @@ Pointer __should__ be aligned by 16-byte boundary. */ template inline void v_store_aligned(_Tp* ptr, const v_reg<_Tp, n>& a) { - for( int i = 0; i < n; i++ ) - ptr[i] = a.s[i]; + CV_Assert(isAligned)>(ptr)); + v_store(ptr, a); +} + +template +inline void v_store_aligned_nocache(_Tp* ptr, const v_reg<_Tp, n>& a) +{ + CV_Assert(isAligned)>(ptr)); + v_store(ptr, a); +} + +template +inline void v_store_aligned(_Tp* ptr, const v_reg<_Tp, n>& a, hal::StoreMode /*mode*/) +{ + CV_Assert(isAligned)>(ptr)); + v_store(ptr, a); } /** @brief Combine vector from first elements of two vectors @@ -1282,6 +1880,23 @@ inline void v_recombine(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b, } } +/** @brief Vector reverse order + +Reverse the order of the vector +Scheme: +@code + REG {A1 ... An} ==> REG {An ... A1} +@endcode +For all types. */ +template +inline v_reg<_Tp, n> v_reverse(const v_reg<_Tp, n>& a) +{ + v_reg<_Tp, n> c; + for( int i = 0; i < n; i++ ) + c.s[i] = a.s[n-i-1]; + return c; +} + /** @brief Vector extract Scheme: @@ -1300,7 +1915,7 @@ Restriction: 0 <= shift < nlanes v_int32x4 a, b, c; c = v_extract<2>(a, b); @endcode -For integer types only. */ +For all types. */ template inline v_reg<_Tp, n> v_extract(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) { @@ -1314,6 +1929,42 @@ inline v_reg<_Tp, n> v_extract(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) return r; } +/** @brief Vector extract + +Scheme: +Return the s-th element of v. +Restriction: 0 <= s < nlanes + +Usage: +@code +v_int32x4 a; +int r; +r = v_extract_n<2>(a); +@endcode +For all types. */ +template +inline _Tp v_extract_n(const v_reg<_Tp, n>& v) +{ + CV_DbgAssert(s >= 0 && s < n); + return v.s[s]; +} + +/** @brief Broadcast i-th element of vector + +Scheme: +@code +{ v[0] v[1] v[2] ... v[SZ] } => { v[i], v[i], v[i] ... v[i] } +@endcode +Restriction: 0 <= i < nlanes +Supported types: 32-bit integers and floats (s32/u32/f32) + */ +template +inline v_reg<_Tp, n> v_broadcast_element(const v_reg<_Tp, n>& a) +{ + CV_DbgAssert(i >= 0 && i < n); + return v_reg<_Tp, n>::all(a.s[i]); +} + /** @brief Round Rounds each value. Input type is float vector ==> output type is int vector.*/ @@ -1325,6 +1976,18 @@ template inline v_reg v_round(const v_reg& a) return c; } +/** @overload */ +template inline v_reg v_round(const v_reg& a, const v_reg& b) +{ + v_reg c; + for( int i = 0; i < n; i++ ) + { + c.s[i] = cvRound(a.s[i]); + c.s[i+n] = cvRound(b.s[i]); + } + return c; +} + /** @brief Floor Floor each value. Input type is float vector ==> output type is int vector.*/ @@ -1417,28 +2080,241 @@ template inline v_reg v_cvt_f32(const v_reg& a) return c; } +template inline v_reg v_cvt_f32(const v_reg& a) +{ + v_reg c; + for( int i = 0; i < n; i++ ) + { + c.s[i] = (float)a.s[i]; + c.s[i+n] = 0; + } + return c; +} + +template inline v_reg v_cvt_f32(const v_reg& a, const v_reg& b) +{ + v_reg c; + for( int i = 0; i < n; i++ ) + { + c.s[i] = (float)a.s[i]; + c.s[i+n] = (float)b.s[i]; + } + return c; +} + /** @brief Convert to double Supported input type is cv::v_int32x4. */ -template inline v_reg v_cvt_f64(const v_reg& a) +CV_INLINE v_reg v_cvt_f64(const v_reg& a) { + enum { n = 2 }; v_reg c; for( int i = 0; i < n; i++ ) c.s[i] = (double)a.s[i]; return c; } +/** @brief Convert to double high part of vector + +Supported input type is cv::v_int32x4. */ +CV_INLINE v_reg v_cvt_f64_high(const v_reg& a) +{ + enum { n = 2 }; + v_reg c; + for( int i = 0; i < n; i++ ) + c.s[i] = (double)a.s[i + 2]; + return c; +} + /** @brief Convert to double Supported input type is cv::v_float32x4. */ -template inline v_reg v_cvt_f64(const v_reg& a) +CV_INLINE v_reg v_cvt_f64(const v_reg& a) +{ + enum { n = 2 }; + v_reg c; + for( int i = 0; i < n; i++ ) + c.s[i] = (double)a.s[i]; + return c; +} + +/** @brief Convert to double high part of vector + +Supported input type is cv::v_float32x4. */ +CV_INLINE v_reg v_cvt_f64_high(const v_reg& a) +{ + enum { n = 2 }; + v_reg c; + for( int i = 0; i < n; i++ ) + c.s[i] = (double)a.s[i + 2]; + return c; +} + +/** @brief Convert to double + +Supported input type is cv::v_int64x2. */ +CV_INLINE v_reg v_cvt_f64(const v_reg& a) { + enum { n = 2 }; v_reg c; for( int i = 0; i < n; i++ ) c.s[i] = (double)a.s[i]; return c; } +/** @brief Convert to double high part of vector + +Supported input type is cv::v_int64x2. */ +CV_INLINE v_reg v_cvt_f64_high(const v_reg& a) +{ + enum { n = 2 }; + v_reg c; + for( int i = 0; i < n; i++ ) + c.s[i] = (double)a.s[i]; + return c; +} + + +template inline v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128> v_lut(const _Tp* tab, const int* idx) +{ + v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128> c; + for (int i = 0; i < V_TypeTraits<_Tp>::nlanes128; i++) + c.s[i] = tab[idx[i]]; + return c; +} +template inline v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128> v_lut_pairs(const _Tp* tab, const int* idx) +{ + v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128> c; + for (int i = 0; i < V_TypeTraits<_Tp>::nlanes128; i++) + c.s[i] = tab[idx[i / 2] + i % 2]; + return c; +} +template inline v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128> v_lut_quads(const _Tp* tab, const int* idx) +{ + v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128> c; + for (int i = 0; i < V_TypeTraits<_Tp>::nlanes128; i++) + c.s[i] = tab[idx[i / 4] + i % 4]; + return c; +} + +template inline v_reg v_lut(const int* tab, const v_reg& idx) +{ + v_reg c; + for( int i = 0; i < n; i++ ) + c.s[i] = tab[idx.s[i]]; + return c; +} + +template inline v_reg v_lut(const unsigned* tab, const v_reg& idx) +{ + v_reg c; + for (int i = 0; i < n; i++) + c.s[i] = tab[idx.s[i]]; + return c; +} + +template inline v_reg v_lut(const float* tab, const v_reg& idx) +{ + v_reg c; + for( int i = 0; i < n; i++ ) + c.s[i] = tab[idx.s[i]]; + return c; +} + +template inline v_reg v_lut(const double* tab, const v_reg& idx) +{ + v_reg c; + for( int i = 0; i < n; i++ ) + c.s[i] = tab[idx.s[i]]; + return c; +} + + +inline v_int32x4 v_lut(const int* tab, const v_int32x4& idxvec) +{ + return v_lut(tab, idxvec.s); +} + +inline v_uint32x4 v_lut(const unsigned* tab, const v_int32x4& idxvec) +{ + return v_lut(tab, idxvec.s); +} + +inline v_float32x4 v_lut(const float* tab, const v_int32x4& idxvec) +{ + return v_lut(tab, idxvec.s); +} + +inline v_float64x2 v_lut(const double* tab, const v_int32x4& idxvec) +{ + return v_lut(tab, idxvec.s); +} + + +template inline void v_lut_deinterleave(const float* tab, const v_reg& idx, + v_reg& x, v_reg& y) +{ + for( int i = 0; i < n; i++ ) + { + int j = idx.s[i]; + x.s[i] = tab[j]; + y.s[i] = tab[j+1]; + } +} + +template inline void v_lut_deinterleave(const double* tab, const v_reg& idx, + v_reg& x, v_reg& y) +{ + for( int i = 0; i < n; i++ ) + { + int j = idx.s[i]; + x.s[i] = tab[j]; + y.s[i] = tab[j+1]; + } +} + +template inline v_reg<_Tp, n> v_interleave_pairs(const v_reg<_Tp, n>& vec) +{ + v_reg<_Tp, n> c; + for (int i = 0; i < n/4; i++) + { + c.s[4*i ] = vec.s[4*i ]; + c.s[4*i+1] = vec.s[4*i+2]; + c.s[4*i+2] = vec.s[4*i+1]; + c.s[4*i+3] = vec.s[4*i+3]; + } + return c; +} + +template inline v_reg<_Tp, n> v_interleave_quads(const v_reg<_Tp, n>& vec) +{ + v_reg<_Tp, n> c; + for (int i = 0; i < n/8; i++) + { + c.s[8*i ] = vec.s[8*i ]; + c.s[8*i+1] = vec.s[8*i+4]; + c.s[8*i+2] = vec.s[8*i+1]; + c.s[8*i+3] = vec.s[8*i+5]; + c.s[8*i+4] = vec.s[8*i+2]; + c.s[8*i+5] = vec.s[8*i+6]; + c.s[8*i+6] = vec.s[8*i+3]; + c.s[8*i+7] = vec.s[8*i+7]; + } + return c; +} + +template inline v_reg<_Tp, n> v_pack_triplets(const v_reg<_Tp, n>& vec) +{ + v_reg<_Tp, n> c; + for (int i = 0; i < n/4; i++) + { + c.s[3*i ] = vec.s[4*i ]; + c.s[3*i+1] = vec.s[4*i+1]; + c.s[3*i+2] = vec.s[4*i+2]; + } + return c; +} + /** @brief Transpose 4x4 matrix Scheme: @@ -1586,14 +2462,14 @@ OPENCV_HAL_IMPL_C_RSHIFTR(v_int64x2, int64) //! @brief Helper macro //! @ingroup core_hal_intrin_impl -#define OPENCV_HAL_IMPL_C_PACK(_Tpvec, _Tpnvec, _Tpn, pack_suffix) \ +#define OPENCV_HAL_IMPL_C_PACK(_Tpvec, _Tpnvec, _Tpn, pack_suffix, cast) \ inline _Tpnvec v_##pack_suffix(const _Tpvec& a, const _Tpvec& b) \ { \ _Tpnvec c; \ for( int i = 0; i < _Tpvec::nlanes; i++ ) \ { \ - c.s[i] = saturate_cast<_Tpn>(a.s[i]); \ - c.s[i+_Tpvec::nlanes] = saturate_cast<_Tpn>(b.s[i]); \ + c.s[i] = cast<_Tpn>(a.s[i]); \ + c.s[i+_Tpvec::nlanes] = cast<_Tpn>(b.s[i]); \ } \ return c; \ } @@ -1607,26 +2483,28 @@ inline _Tpnvec v_##pack_suffix(const _Tpvec& a, const _Tpvec& b) \ //! //! - pack: for 16-, 32- and 64-bit integer input types //! - pack_u: for 16- and 32-bit signed integer input types -OPENCV_HAL_IMPL_C_PACK(v_uint16x8, v_uint8x16, uchar, pack) -OPENCV_HAL_IMPL_C_PACK(v_int16x8, v_int8x16, schar, pack) -OPENCV_HAL_IMPL_C_PACK(v_uint32x4, v_uint16x8, ushort, pack) -OPENCV_HAL_IMPL_C_PACK(v_int32x4, v_int16x8, short, pack) -OPENCV_HAL_IMPL_C_PACK(v_uint64x2, v_uint32x4, unsigned, pack) -OPENCV_HAL_IMPL_C_PACK(v_int64x2, v_int32x4, int, pack) -OPENCV_HAL_IMPL_C_PACK(v_int16x8, v_uint8x16, uchar, pack_u) -OPENCV_HAL_IMPL_C_PACK(v_int32x4, v_uint16x8, ushort, pack_u) +//! +//! @note All variants except 64-bit use saturation. +OPENCV_HAL_IMPL_C_PACK(v_uint16x8, v_uint8x16, uchar, pack, saturate_cast) +OPENCV_HAL_IMPL_C_PACK(v_int16x8, v_int8x16, schar, pack, saturate_cast) +OPENCV_HAL_IMPL_C_PACK(v_uint32x4, v_uint16x8, ushort, pack, saturate_cast) +OPENCV_HAL_IMPL_C_PACK(v_int32x4, v_int16x8, short, pack, saturate_cast) +OPENCV_HAL_IMPL_C_PACK(v_uint64x2, v_uint32x4, unsigned, pack, static_cast) +OPENCV_HAL_IMPL_C_PACK(v_int64x2, v_int32x4, int, pack, static_cast) +OPENCV_HAL_IMPL_C_PACK(v_int16x8, v_uint8x16, uchar, pack_u, saturate_cast) +OPENCV_HAL_IMPL_C_PACK(v_int32x4, v_uint16x8, ushort, pack_u, saturate_cast) //! @} //! @brief Helper macro //! @ingroup core_hal_intrin_impl -#define OPENCV_HAL_IMPL_C_RSHR_PACK(_Tpvec, _Tp, _Tpnvec, _Tpn, pack_suffix) \ +#define OPENCV_HAL_IMPL_C_RSHR_PACK(_Tpvec, _Tp, _Tpnvec, _Tpn, pack_suffix, cast) \ template inline _Tpnvec v_rshr_##pack_suffix(const _Tpvec& a, const _Tpvec& b) \ { \ _Tpnvec c; \ for( int i = 0; i < _Tpvec::nlanes; i++ ) \ { \ - c.s[i] = saturate_cast<_Tpn>((a.s[i] + ((_Tp)1 << (n - 1))) >> n); \ - c.s[i+_Tpvec::nlanes] = saturate_cast<_Tpn>((b.s[i] + ((_Tp)1 << (n - 1))) >> n); \ + c.s[i] = cast<_Tpn>((a.s[i] + ((_Tp)1 << (n - 1))) >> n); \ + c.s[i+_Tpvec::nlanes] = cast<_Tpn>((b.s[i] + ((_Tp)1 << (n - 1))) >> n); \ } \ return c; \ } @@ -1640,51 +2518,55 @@ template inline _Tpnvec v_rshr_##pack_suffix(const _Tpvec& a, const _Tpve //! //! - pack: for 16-, 32- and 64-bit integer input types //! - pack_u: for 16- and 32-bit signed integer input types -OPENCV_HAL_IMPL_C_RSHR_PACK(v_uint16x8, ushort, v_uint8x16, uchar, pack) -OPENCV_HAL_IMPL_C_RSHR_PACK(v_int16x8, short, v_int8x16, schar, pack) -OPENCV_HAL_IMPL_C_RSHR_PACK(v_uint32x4, unsigned, v_uint16x8, ushort, pack) -OPENCV_HAL_IMPL_C_RSHR_PACK(v_int32x4, int, v_int16x8, short, pack) -OPENCV_HAL_IMPL_C_RSHR_PACK(v_uint64x2, uint64, v_uint32x4, unsigned, pack) -OPENCV_HAL_IMPL_C_RSHR_PACK(v_int64x2, int64, v_int32x4, int, pack) -OPENCV_HAL_IMPL_C_RSHR_PACK(v_int16x8, short, v_uint8x16, uchar, pack_u) -OPENCV_HAL_IMPL_C_RSHR_PACK(v_int32x4, int, v_uint16x8, ushort, pack_u) +//! +//! @note All variants except 64-bit use saturation. +OPENCV_HAL_IMPL_C_RSHR_PACK(v_uint16x8, ushort, v_uint8x16, uchar, pack, saturate_cast) +OPENCV_HAL_IMPL_C_RSHR_PACK(v_int16x8, short, v_int8x16, schar, pack, saturate_cast) +OPENCV_HAL_IMPL_C_RSHR_PACK(v_uint32x4, unsigned, v_uint16x8, ushort, pack, saturate_cast) +OPENCV_HAL_IMPL_C_RSHR_PACK(v_int32x4, int, v_int16x8, short, pack, saturate_cast) +OPENCV_HAL_IMPL_C_RSHR_PACK(v_uint64x2, uint64, v_uint32x4, unsigned, pack, static_cast) +OPENCV_HAL_IMPL_C_RSHR_PACK(v_int64x2, int64, v_int32x4, int, pack, static_cast) +OPENCV_HAL_IMPL_C_RSHR_PACK(v_int16x8, short, v_uint8x16, uchar, pack_u, saturate_cast) +OPENCV_HAL_IMPL_C_RSHR_PACK(v_int32x4, int, v_uint16x8, ushort, pack_u, saturate_cast) //! @} //! @brief Helper macro //! @ingroup core_hal_intrin_impl -#define OPENCV_HAL_IMPL_C_PACK_STORE(_Tpvec, _Tp, _Tpnvec, _Tpn, pack_suffix) \ +#define OPENCV_HAL_IMPL_C_PACK_STORE(_Tpvec, _Tp, _Tpnvec, _Tpn, pack_suffix, cast) \ inline void v_##pack_suffix##_store(_Tpn* ptr, const _Tpvec& a) \ { \ for( int i = 0; i < _Tpvec::nlanes; i++ ) \ - ptr[i] = saturate_cast<_Tpn>(a.s[i]); \ + ptr[i] = cast<_Tpn>(a.s[i]); \ } //! @name Pack and store //! @{ //! @brief Store values from the input vector into memory with pack //! -//! Values will be stored into memory with saturating conversion to narrower type. +//! Values will be stored into memory with conversion to narrower type. //! Variant with _u_ suffix converts to corresponding unsigned type. //! //! - pack: for 16-, 32- and 64-bit integer input types //! - pack_u: for 16- and 32-bit signed integer input types -OPENCV_HAL_IMPL_C_PACK_STORE(v_uint16x8, ushort, v_uint8x16, uchar, pack) -OPENCV_HAL_IMPL_C_PACK_STORE(v_int16x8, short, v_int8x16, schar, pack) -OPENCV_HAL_IMPL_C_PACK_STORE(v_uint32x4, unsigned, v_uint16x8, ushort, pack) -OPENCV_HAL_IMPL_C_PACK_STORE(v_int32x4, int, v_int16x8, short, pack) -OPENCV_HAL_IMPL_C_PACK_STORE(v_uint64x2, uint64, v_uint32x4, unsigned, pack) -OPENCV_HAL_IMPL_C_PACK_STORE(v_int64x2, int64, v_int32x4, int, pack) -OPENCV_HAL_IMPL_C_PACK_STORE(v_int16x8, short, v_uint8x16, uchar, pack_u) -OPENCV_HAL_IMPL_C_PACK_STORE(v_int32x4, int, v_uint16x8, ushort, pack_u) +//! +//! @note All variants except 64-bit use saturation. +OPENCV_HAL_IMPL_C_PACK_STORE(v_uint16x8, ushort, v_uint8x16, uchar, pack, saturate_cast) +OPENCV_HAL_IMPL_C_PACK_STORE(v_int16x8, short, v_int8x16, schar, pack, saturate_cast) +OPENCV_HAL_IMPL_C_PACK_STORE(v_uint32x4, unsigned, v_uint16x8, ushort, pack, saturate_cast) +OPENCV_HAL_IMPL_C_PACK_STORE(v_int32x4, int, v_int16x8, short, pack, saturate_cast) +OPENCV_HAL_IMPL_C_PACK_STORE(v_uint64x2, uint64, v_uint32x4, unsigned, pack, static_cast) +OPENCV_HAL_IMPL_C_PACK_STORE(v_int64x2, int64, v_int32x4, int, pack, static_cast) +OPENCV_HAL_IMPL_C_PACK_STORE(v_int16x8, short, v_uint8x16, uchar, pack_u, saturate_cast) +OPENCV_HAL_IMPL_C_PACK_STORE(v_int32x4, int, v_uint16x8, ushort, pack_u, saturate_cast) //! @} //! @brief Helper macro //! @ingroup core_hal_intrin_impl -#define OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(_Tpvec, _Tp, _Tpnvec, _Tpn, pack_suffix) \ +#define OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(_Tpvec, _Tp, _Tpnvec, _Tpn, pack_suffix, cast) \ template inline void v_rshr_##pack_suffix##_store(_Tpn* ptr, const _Tpvec& a) \ { \ for( int i = 0; i < _Tpvec::nlanes; i++ ) \ - ptr[i] = saturate_cast<_Tpn>((a.s[i] + ((_Tp)1 << (n - 1))) >> n); \ + ptr[i] = cast<_Tpn>((a.s[i] + ((_Tp)1 << (n - 1))) >> n); \ } //! @name Pack and store with rounding shift @@ -1696,14 +2578,113 @@ template inline void v_rshr_##pack_suffix##_store(_Tpn* ptr, const _Tpvec //! //! - pack: for 16-, 32- and 64-bit integer input types //! - pack_u: for 16- and 32-bit signed integer input types -OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_uint16x8, ushort, v_uint8x16, uchar, pack) -OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_int16x8, short, v_int8x16, schar, pack) -OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_uint32x4, unsigned, v_uint16x8, ushort, pack) -OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_int32x4, int, v_int16x8, short, pack) -OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_uint64x2, uint64, v_uint32x4, unsigned, pack) -OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_int64x2, int64, v_int32x4, int, pack) -OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_int16x8, short, v_uint8x16, uchar, pack_u) -OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_int32x4, int, v_uint16x8, ushort, pack_u) +//! +//! @note All variants except 64-bit use saturation. +OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_uint16x8, ushort, v_uint8x16, uchar, pack, saturate_cast) +OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_int16x8, short, v_int8x16, schar, pack, saturate_cast) +OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_uint32x4, unsigned, v_uint16x8, ushort, pack, saturate_cast) +OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_int32x4, int, v_int16x8, short, pack, saturate_cast) +OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_uint64x2, uint64, v_uint32x4, unsigned, pack, static_cast) +OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_int64x2, int64, v_int32x4, int, pack, static_cast) +OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_int16x8, short, v_uint8x16, uchar, pack_u, saturate_cast) +OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_int32x4, int, v_uint16x8, ushort, pack_u, saturate_cast) +//! @} + +//! @cond IGNORED +template +inline void _pack_b(_Tpm* mptr, const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) +{ + for (int i = 0; i < n; ++i) + { + mptr[i] = (_Tpm)a.s[i]; + mptr[i + n] = (_Tpm)b.s[i]; + } +} +//! @endcond + +//! @name Pack boolean values +//! @{ +//! @brief Pack boolean values from multiple vectors to one unsigned 8-bit integer vector +//! +//! @note Must provide valid boolean values to guarantee same result for all architectures. + +/** @brief +//! For 16-bit boolean values + +Scheme: +@code +a {0xFFFF 0 0 0xFFFF 0 0xFFFF 0xFFFF 0} +b {0xFFFF 0 0xFFFF 0 0 0xFFFF 0 0xFFFF} +=============== +{ + 0xFF 0 0 0xFF 0 0xFF 0xFF 0 + 0xFF 0 0xFF 0 0 0xFF 0 0xFF +} +@endcode */ + +inline v_uint8x16 v_pack_b(const v_uint16x8& a, const v_uint16x8& b) +{ + v_uint8x16 mask; + _pack_b(mask.s, a, b); + return mask; +} + +/** @overload +For 32-bit boolean values + +Scheme: +@code +a {0xFFFF.. 0 0 0xFFFF..} +b {0 0xFFFF.. 0xFFFF.. 0} +c {0xFFFF.. 0 0xFFFF.. 0} +d {0 0xFFFF.. 0 0xFFFF..} +=============== +{ + 0xFF 0 0 0xFF 0 0xFF 0xFF 0 + 0xFF 0 0xFF 0 0 0xFF 0 0xFF +} +@endcode */ + +inline v_uint8x16 v_pack_b(const v_uint32x4& a, const v_uint32x4& b, + const v_uint32x4& c, const v_uint32x4& d) +{ + v_uint8x16 mask; + _pack_b(mask.s, a, b); + _pack_b(mask.s + 8, c, d); + return mask; +} + +/** @overload +For 64-bit boolean values + +Scheme: +@code +a {0xFFFF.. 0} +b {0 0xFFFF..} +c {0xFFFF.. 0} +d {0 0xFFFF..} + +e {0xFFFF.. 0} +f {0xFFFF.. 0} +g {0 0xFFFF..} +h {0 0xFFFF..} +=============== +{ + 0xFF 0 0 0xFF 0xFF 0 0 0xFF + 0xFF 0 0xFF 0 0 0xFF 0 0xFF +} +@endcode */ +inline v_uint8x16 v_pack_b(const v_uint64x2& a, const v_uint64x2& b, const v_uint64x2& c, + const v_uint64x2& d, const v_uint64x2& e, const v_uint64x2& f, + const v_uint64x2& g, const v_uint64x2& h) +{ + v_uint8x16 mask; + _pack_b(mask.s, a, b); + _pack_b(mask.s + 4, c, d); + _pack_b(mask.s + 8, e, f); + _pack_b(mask.s + 12, g, h); + return mask; +} //! @} /** @brief Matrix multiplication @@ -1731,8 +2712,70 @@ inline v_float32x4 v_matmul(const v_float32x4& v, const v_float32x4& m0, v.s[0]*m0.s[3] + v.s[1]*m1.s[3] + v.s[2]*m2.s[3] + v.s[3]*m3.s[3]); } +/** @brief Matrix multiplication and add + +Scheme: +@code +{A0 A1 A2 } |V0| |D0| +{B0 B1 B2 } |V1| |D1| +{C0 C1 C2 } x |V2| + |D2| +==================== +{R0 R1 R2 R3}, where: +R0 = A0V0 + A1V1 + A2V2 + D0, +R1 = B0V0 + B1V1 + B2V2 + D1 +... +@endcode +*/ +inline v_float32x4 v_matmuladd(const v_float32x4& v, const v_float32x4& m0, + const v_float32x4& m1, const v_float32x4& m2, + const v_float32x4& m3) +{ + return v_float32x4(v.s[0]*m0.s[0] + v.s[1]*m1.s[0] + v.s[2]*m2.s[0] + m3.s[0], + v.s[0]*m0.s[1] + v.s[1]*m1.s[1] + v.s[2]*m2.s[1] + m3.s[1], + v.s[0]*m0.s[2] + v.s[1]*m1.s[2] + v.s[2]*m2.s[2] + m3.s[2], + v.s[0]*m0.s[3] + v.s[1]*m1.s[3] + v.s[2]*m2.s[3] + m3.s[3]); +} + + +inline v_float64x2 v_dotprod_expand(const v_int32x4& a, const v_int32x4& b) +{ return v_fma(v_cvt_f64(a), v_cvt_f64(b), v_cvt_f64_high(a) * v_cvt_f64_high(b)); } +inline v_float64x2 v_dotprod_expand(const v_int32x4& a, const v_int32x4& b, const v_float64x2& c) +{ return v_fma(v_cvt_f64(a), v_cvt_f64(b), v_fma(v_cvt_f64_high(a), v_cvt_f64_high(b), c)); } + +inline v_float64x2 v_dotprod_expand_fast(const v_int32x4& a, const v_int32x4& b) +{ return v_dotprod_expand(a, b); } +inline v_float64x2 v_dotprod_expand_fast(const v_int32x4& a, const v_int32x4& b, const v_float64x2& c) +{ return v_dotprod_expand(a, b, c); } + +////// FP16 support /////// + +inline v_reg::nlanes128> +v_load_expand(const float16_t* ptr) +{ + v_reg::nlanes128> v; + for( int i = 0; i < v.nlanes; i++ ) + { + v.s[i] = ptr[i]; + } + return v; +} + +inline void +v_pack_store(float16_t* ptr, const v_reg::nlanes128>& v) +{ + for( int i = 0; i < v.nlanes; i++ ) + { + ptr[i] = float16_t(v.s[i]); + } +} + +inline void v_cleanup() {} + //! @} +#ifndef CV_DOXYGEN +CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END +#endif } #endif diff --git a/IPL/include/opencv/opencv2/core/hal/intrin_forward.hpp b/IPL/include/opencv/opencv2/core/hal/intrin_forward.hpp new file mode 100644 index 0000000..979f15a --- /dev/null +++ b/IPL/include/opencv/opencv2/core/hal/intrin_forward.hpp @@ -0,0 +1,191 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html + +#ifndef CV__SIMD_FORWARD +#error "Need to pre-define forward width" +#endif + +namespace cv +{ + +//! @cond IGNORED + +CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN + +/** Types **/ +#if CV__SIMD_FORWARD == 1024 +// [todo] 1024 +#error "1024-long ops not implemented yet" +#elif CV__SIMD_FORWARD == 512 +// 512 +#define __CV_VX(fun) v512_##fun +#define __CV_V_UINT8 v_uint8x64 +#define __CV_V_INT8 v_int8x64 +#define __CV_V_UINT16 v_uint16x32 +#define __CV_V_INT16 v_int16x32 +#define __CV_V_UINT32 v_uint32x16 +#define __CV_V_INT32 v_int32x16 +#define __CV_V_UINT64 v_uint64x8 +#define __CV_V_INT64 v_int64x8 +#define __CV_V_FLOAT32 v_float32x16 +#define __CV_V_FLOAT64 v_float64x8 +struct v_uint8x64; +struct v_int8x64; +struct v_uint16x32; +struct v_int16x32; +struct v_uint32x16; +struct v_int32x16; +struct v_uint64x8; +struct v_int64x8; +struct v_float32x16; +struct v_float64x8; +#elif CV__SIMD_FORWARD == 256 +// 256 +#define __CV_VX(fun) v256_##fun +#define __CV_V_UINT8 v_uint8x32 +#define __CV_V_INT8 v_int8x32 +#define __CV_V_UINT16 v_uint16x16 +#define __CV_V_INT16 v_int16x16 +#define __CV_V_UINT32 v_uint32x8 +#define __CV_V_INT32 v_int32x8 +#define __CV_V_UINT64 v_uint64x4 +#define __CV_V_INT64 v_int64x4 +#define __CV_V_FLOAT32 v_float32x8 +#define __CV_V_FLOAT64 v_float64x4 +struct v_uint8x32; +struct v_int8x32; +struct v_uint16x16; +struct v_int16x16; +struct v_uint32x8; +struct v_int32x8; +struct v_uint64x4; +struct v_int64x4; +struct v_float32x8; +struct v_float64x4; +#else +// 128 +#define __CV_VX(fun) v_##fun +#define __CV_V_UINT8 v_uint8x16 +#define __CV_V_INT8 v_int8x16 +#define __CV_V_UINT16 v_uint16x8 +#define __CV_V_INT16 v_int16x8 +#define __CV_V_UINT32 v_uint32x4 +#define __CV_V_INT32 v_int32x4 +#define __CV_V_UINT64 v_uint64x2 +#define __CV_V_INT64 v_int64x2 +#define __CV_V_FLOAT32 v_float32x4 +#define __CV_V_FLOAT64 v_float64x2 +struct v_uint8x16; +struct v_int8x16; +struct v_uint16x8; +struct v_int16x8; +struct v_uint32x4; +struct v_int32x4; +struct v_uint64x2; +struct v_int64x2; +struct v_float32x4; +struct v_float64x2; +#endif + +/** Value reordering **/ + +// Expansion +void v_expand(const __CV_V_UINT8&, __CV_V_UINT16&, __CV_V_UINT16&); +void v_expand(const __CV_V_INT8&, __CV_V_INT16&, __CV_V_INT16&); +void v_expand(const __CV_V_UINT16&, __CV_V_UINT32&, __CV_V_UINT32&); +void v_expand(const __CV_V_INT16&, __CV_V_INT32&, __CV_V_INT32&); +void v_expand(const __CV_V_UINT32&, __CV_V_UINT64&, __CV_V_UINT64&); +void v_expand(const __CV_V_INT32&, __CV_V_INT64&, __CV_V_INT64&); +// Low Expansion +__CV_V_UINT16 v_expand_low(const __CV_V_UINT8&); +__CV_V_INT16 v_expand_low(const __CV_V_INT8&); +__CV_V_UINT32 v_expand_low(const __CV_V_UINT16&); +__CV_V_INT32 v_expand_low(const __CV_V_INT16&); +__CV_V_UINT64 v_expand_low(const __CV_V_UINT32&); +__CV_V_INT64 v_expand_low(const __CV_V_INT32&); +// High Expansion +__CV_V_UINT16 v_expand_high(const __CV_V_UINT8&); +__CV_V_INT16 v_expand_high(const __CV_V_INT8&); +__CV_V_UINT32 v_expand_high(const __CV_V_UINT16&); +__CV_V_INT32 v_expand_high(const __CV_V_INT16&); +__CV_V_UINT64 v_expand_high(const __CV_V_UINT32&); +__CV_V_INT64 v_expand_high(const __CV_V_INT32&); +// Load & Low Expansion +__CV_V_UINT16 __CV_VX(load_expand)(const uchar*); +__CV_V_INT16 __CV_VX(load_expand)(const schar*); +__CV_V_UINT32 __CV_VX(load_expand)(const ushort*); +__CV_V_INT32 __CV_VX(load_expand)(const short*); +__CV_V_UINT64 __CV_VX(load_expand)(const uint*); +__CV_V_INT64 __CV_VX(load_expand)(const int*); +// Load lower 8-bit and expand into 32-bit +__CV_V_UINT32 __CV_VX(load_expand_q)(const uchar*); +__CV_V_INT32 __CV_VX(load_expand_q)(const schar*); + +// Saturating Pack +__CV_V_UINT8 v_pack(const __CV_V_UINT16&, const __CV_V_UINT16&); +__CV_V_INT8 v_pack(const __CV_V_INT16&, const __CV_V_INT16&); +__CV_V_UINT16 v_pack(const __CV_V_UINT32&, const __CV_V_UINT32&); +__CV_V_INT16 v_pack(const __CV_V_INT32&, const __CV_V_INT32&); +// Non-saturating Pack +__CV_V_UINT32 v_pack(const __CV_V_UINT64&, const __CV_V_UINT64&); +__CV_V_INT32 v_pack(const __CV_V_INT64&, const __CV_V_INT64&); +// Pack signed integers with unsigned saturation +__CV_V_UINT8 v_pack_u(const __CV_V_INT16&, const __CV_V_INT16&); +__CV_V_UINT16 v_pack_u(const __CV_V_INT32&, const __CV_V_INT32&); + +/** Arithmetic, bitwise and comparison operations **/ + +// Non-saturating multiply +#if CV_VSX +template +Tvec v_mul_wrap(const Tvec& a, const Tvec& b); +#else +__CV_V_UINT8 v_mul_wrap(const __CV_V_UINT8&, const __CV_V_UINT8&); +__CV_V_INT8 v_mul_wrap(const __CV_V_INT8&, const __CV_V_INT8&); +__CV_V_UINT16 v_mul_wrap(const __CV_V_UINT16&, const __CV_V_UINT16&); +__CV_V_INT16 v_mul_wrap(const __CV_V_INT16&, const __CV_V_INT16&); +#endif + +// Multiply and expand +#if CV_VSX +template +void v_mul_expand(const Tvec& a, const Tvec& b, Twvec& c, Twvec& d); +#else +void v_mul_expand(const __CV_V_UINT8&, const __CV_V_UINT8&, __CV_V_UINT16&, __CV_V_UINT16&); +void v_mul_expand(const __CV_V_INT8&, const __CV_V_INT8&, __CV_V_INT16&, __CV_V_INT16&); +void v_mul_expand(const __CV_V_UINT16&, const __CV_V_UINT16&, __CV_V_UINT32&, __CV_V_UINT32&); +void v_mul_expand(const __CV_V_INT16&, const __CV_V_INT16&, __CV_V_INT32&, __CV_V_INT32&); +void v_mul_expand(const __CV_V_UINT32&, const __CV_V_UINT32&, __CV_V_UINT64&, __CV_V_UINT64&); +void v_mul_expand(const __CV_V_INT32&, const __CV_V_INT32&, __CV_V_INT64&, __CV_V_INT64&); +#endif + +// Conversions +__CV_V_FLOAT32 v_cvt_f32(const __CV_V_INT32& a); +__CV_V_FLOAT32 v_cvt_f32(const __CV_V_FLOAT64& a); +__CV_V_FLOAT32 v_cvt_f32(const __CV_V_FLOAT64& a, const __CV_V_FLOAT64& b); +__CV_V_FLOAT64 v_cvt_f64(const __CV_V_INT32& a); +__CV_V_FLOAT64 v_cvt_f64_high(const __CV_V_INT32& a); +__CV_V_FLOAT64 v_cvt_f64(const __CV_V_FLOAT32& a); +__CV_V_FLOAT64 v_cvt_f64_high(const __CV_V_FLOAT32& a); +__CV_V_FLOAT64 v_cvt_f64(const __CV_V_INT64& a); + +/** Cleanup **/ +#undef CV__SIMD_FORWARD +#undef __CV_VX +#undef __CV_V_UINT8 +#undef __CV_V_INT8 +#undef __CV_V_UINT16 +#undef __CV_V_INT16 +#undef __CV_V_UINT32 +#undef __CV_V_INT32 +#undef __CV_V_UINT64 +#undef __CV_V_INT64 +#undef __CV_V_FLOAT32 +#undef __CV_V_FLOAT64 + +CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END + +//! @endcond + +} // cv:: \ No newline at end of file diff --git a/IPL/include/opencv/opencv2/core/hal/intrin_msa.hpp b/IPL/include/opencv/opencv2/core/hal/intrin_msa.hpp new file mode 100644 index 0000000..260350c --- /dev/null +++ b/IPL/include/opencv/opencv2/core/hal/intrin_msa.hpp @@ -0,0 +1,1872 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#ifndef OPENCV_HAL_INTRIN_MSA_HPP +#define OPENCV_HAL_INTRIN_MSA_HPP + +#include +#include "opencv2/core/utility.hpp" + +namespace cv +{ + +//! @cond IGNORED +CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN + +#define CV_SIMD128 1 + +//MSA implements 128-bit wide vector registers shared with the 64-bit wide floating-point unit registers. +//MSA and FPU can not be both present, unless the FPU has 64-bit floating-point registers. +#define CV_SIMD128_64F 1 + +struct v_uint8x16 +{ + typedef uchar lane_type; + enum { nlanes = 16 }; + + v_uint8x16() : val(msa_dupq_n_u8(0)) {} + explicit v_uint8x16(v16u8 v) : val(v) {} + v_uint8x16(uchar v0, uchar v1, uchar v2, uchar v3, uchar v4, uchar v5, uchar v6, uchar v7, + uchar v8, uchar v9, uchar v10, uchar v11, uchar v12, uchar v13, uchar v14, uchar v15) + { + uchar v[] = {v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15}; + val = msa_ld1q_u8(v); + } + uchar get0() const + { + return msa_getq_lane_u8(val, 0); + } + + v16u8 val; +}; + +struct v_int8x16 +{ + typedef schar lane_type; + enum { nlanes = 16 }; + + v_int8x16() : val(msa_dupq_n_s8(0)) {} + explicit v_int8x16(v16i8 v) : val(v) {} + v_int8x16(schar v0, schar v1, schar v2, schar v3, schar v4, schar v5, schar v6, schar v7, + schar v8, schar v9, schar v10, schar v11, schar v12, schar v13, schar v14, schar v15) + { + schar v[] = {v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15}; + val = msa_ld1q_s8(v); + } + schar get0() const + { + return msa_getq_lane_s8(val, 0); + } + + v16i8 val; +}; + +struct v_uint16x8 +{ + typedef ushort lane_type; + enum { nlanes = 8 }; + + v_uint16x8() : val(msa_dupq_n_u16(0)) {} + explicit v_uint16x8(v8u16 v) : val(v) {} + v_uint16x8(ushort v0, ushort v1, ushort v2, ushort v3, ushort v4, ushort v5, ushort v6, ushort v7) + { + ushort v[] = {v0, v1, v2, v3, v4, v5, v6, v7}; + val = msa_ld1q_u16(v); + } + ushort get0() const + { + return msa_getq_lane_u16(val, 0); + } + + v8u16 val; +}; + +struct v_int16x8 +{ + typedef short lane_type; + enum { nlanes = 8 }; + + v_int16x8() : val(msa_dupq_n_s16(0)) {} + explicit v_int16x8(v8i16 v) : val(v) {} + v_int16x8(short v0, short v1, short v2, short v3, short v4, short v5, short v6, short v7) + { + short v[] = {v0, v1, v2, v3, v4, v5, v6, v7}; + val = msa_ld1q_s16(v); + } + short get0() const + { + return msa_getq_lane_s16(val, 0); + } + + v8i16 val; +}; + +struct v_uint32x4 +{ + typedef unsigned int lane_type; + enum { nlanes = 4 }; + + v_uint32x4() : val(msa_dupq_n_u32(0)) {} + explicit v_uint32x4(v4u32 v) : val(v) {} + v_uint32x4(unsigned int v0, unsigned int v1, unsigned int v2, unsigned int v3) + { + unsigned int v[] = {v0, v1, v2, v3}; + val = msa_ld1q_u32(v); + } + unsigned int get0() const + { + return msa_getq_lane_u32(val, 0); + } + + v4u32 val; +}; + +struct v_int32x4 +{ + typedef int lane_type; + enum { nlanes = 4 }; + + v_int32x4() : val(msa_dupq_n_s32(0)) {} + explicit v_int32x4(v4i32 v) : val(v) {} + v_int32x4(int v0, int v1, int v2, int v3) + { + int v[] = {v0, v1, v2, v3}; + val = msa_ld1q_s32(v); + } + int get0() const + { + return msa_getq_lane_s32(val, 0); + } + v4i32 val; +}; + +struct v_float32x4 +{ + typedef float lane_type; + enum { nlanes = 4 }; + + v_float32x4() : val(msa_dupq_n_f32(0.0f)) {} + explicit v_float32x4(v4f32 v) : val(v) {} + v_float32x4(float v0, float v1, float v2, float v3) + { + float v[] = {v0, v1, v2, v3}; + val = msa_ld1q_f32(v); + } + float get0() const + { + return msa_getq_lane_f32(val, 0); + } + v4f32 val; +}; + +struct v_uint64x2 +{ + typedef uint64 lane_type; + enum { nlanes = 2 }; + + v_uint64x2() : val(msa_dupq_n_u64(0)) {} + explicit v_uint64x2(v2u64 v) : val(v) {} + v_uint64x2(uint64 v0, uint64 v1) + { + uint64 v[] = {v0, v1}; + val = msa_ld1q_u64(v); + } + uint64 get0() const + { + return msa_getq_lane_u64(val, 0); + } + v2u64 val; +}; + +struct v_int64x2 +{ + typedef int64 lane_type; + enum { nlanes = 2 }; + + v_int64x2() : val(msa_dupq_n_s64(0)) {} + explicit v_int64x2(v2i64 v) : val(v) {} + v_int64x2(int64 v0, int64 v1) + { + int64 v[] = {v0, v1}; + val = msa_ld1q_s64(v); + } + int64 get0() const + { + return msa_getq_lane_s64(val, 0); + } + v2i64 val; +}; + +struct v_float64x2 +{ + typedef double lane_type; + enum { nlanes = 2 }; + + v_float64x2() : val(msa_dupq_n_f64(0.0f)) {} + explicit v_float64x2(v2f64 v) : val(v) {} + v_float64x2(double v0, double v1) + { + double v[] = {v0, v1}; + val = msa_ld1q_f64(v); + } + double get0() const + { + return msa_getq_lane_f64(val, 0); + } + v2f64 val; +}; + +#define OPENCV_HAL_IMPL_MSA_INIT(_Tpv, _Tp, suffix) \ +inline v_##_Tpv v_setzero_##suffix() { return v_##_Tpv(msa_dupq_n_##suffix((_Tp)0)); } \ +inline v_##_Tpv v_setall_##suffix(_Tp v) { return v_##_Tpv(msa_dupq_n_##suffix(v)); } \ +inline v_uint8x16 v_reinterpret_as_u8(const v_##_Tpv& v) { return v_uint8x16(MSA_TPV_REINTERPRET(v16u8, v.val)); } \ +inline v_int8x16 v_reinterpret_as_s8(const v_##_Tpv& v) { return v_int8x16(MSA_TPV_REINTERPRET(v16i8, v.val)); } \ +inline v_uint16x8 v_reinterpret_as_u16(const v_##_Tpv& v) { return v_uint16x8(MSA_TPV_REINTERPRET(v8u16, v.val)); } \ +inline v_int16x8 v_reinterpret_as_s16(const v_##_Tpv& v) { return v_int16x8(MSA_TPV_REINTERPRET(v8i16, v.val)); } \ +inline v_uint32x4 v_reinterpret_as_u32(const v_##_Tpv& v) { return v_uint32x4(MSA_TPV_REINTERPRET(v4u32, v.val)); } \ +inline v_int32x4 v_reinterpret_as_s32(const v_##_Tpv& v) { return v_int32x4(MSA_TPV_REINTERPRET(v4i32, v.val)); } \ +inline v_uint64x2 v_reinterpret_as_u64(const v_##_Tpv& v) { return v_uint64x2(MSA_TPV_REINTERPRET(v2u64, v.val)); } \ +inline v_int64x2 v_reinterpret_as_s64(const v_##_Tpv& v) { return v_int64x2(MSA_TPV_REINTERPRET(v2i64, v.val)); } \ +inline v_float32x4 v_reinterpret_as_f32(const v_##_Tpv& v) { return v_float32x4(MSA_TPV_REINTERPRET(v4f32, v.val)); } \ +inline v_float64x2 v_reinterpret_as_f64(const v_##_Tpv& v) { return v_float64x2(MSA_TPV_REINTERPRET(v2f64, v.val)); } + +OPENCV_HAL_IMPL_MSA_INIT(uint8x16, uchar, u8) +OPENCV_HAL_IMPL_MSA_INIT(int8x16, schar, s8) +OPENCV_HAL_IMPL_MSA_INIT(uint16x8, ushort, u16) +OPENCV_HAL_IMPL_MSA_INIT(int16x8, short, s16) +OPENCV_HAL_IMPL_MSA_INIT(uint32x4, unsigned int, u32) +OPENCV_HAL_IMPL_MSA_INIT(int32x4, int, s32) +OPENCV_HAL_IMPL_MSA_INIT(uint64x2, uint64, u64) +OPENCV_HAL_IMPL_MSA_INIT(int64x2, int64, s64) +OPENCV_HAL_IMPL_MSA_INIT(float32x4, float, f32) +OPENCV_HAL_IMPL_MSA_INIT(float64x2, double, f64) + +#define OPENCV_HAL_IMPL_MSA_PACK(_Tpvec, _Tpwvec, pack, mov, rshr) \ +inline _Tpvec v_##pack(const _Tpwvec& a, const _Tpwvec& b) \ +{ \ + return _Tpvec(mov(a.val, b.val)); \ +} \ +template inline \ +_Tpvec v_rshr_##pack(const _Tpwvec& a, const _Tpwvec& b) \ +{ \ + return _Tpvec(rshr(a.val, b.val, n)); \ +} + +OPENCV_HAL_IMPL_MSA_PACK(v_uint8x16, v_uint16x8, pack, msa_qpack_u16, msa_qrpackr_u16) +OPENCV_HAL_IMPL_MSA_PACK(v_int8x16, v_int16x8, pack, msa_qpack_s16, msa_qrpackr_s16) +OPENCV_HAL_IMPL_MSA_PACK(v_uint16x8, v_uint32x4, pack, msa_qpack_u32, msa_qrpackr_u32) +OPENCV_HAL_IMPL_MSA_PACK(v_int16x8, v_int32x4, pack, msa_qpack_s32, msa_qrpackr_s32) +OPENCV_HAL_IMPL_MSA_PACK(v_uint32x4, v_uint64x2, pack, msa_pack_u64, msa_rpackr_u64) +OPENCV_HAL_IMPL_MSA_PACK(v_int32x4, v_int64x2, pack, msa_pack_s64, msa_rpackr_s64) +OPENCV_HAL_IMPL_MSA_PACK(v_uint8x16, v_int16x8, pack_u, msa_qpacku_s16, msa_qrpackru_s16) +OPENCV_HAL_IMPL_MSA_PACK(v_uint16x8, v_int32x4, pack_u, msa_qpacku_s32, msa_qrpackru_s32) + +#define OPENCV_HAL_IMPL_MSA_PACK_STORE(_Tpvec, _Tp, hreg, suffix, _Tpwvec, pack, mov, rshr) \ +inline void v_##pack##_store(_Tp* ptr, const _Tpwvec& a) \ +{ \ + hreg a1 = mov(a.val); \ + msa_st1_##suffix(ptr, a1); \ +} \ +template inline \ +void v_rshr_##pack##_store(_Tp* ptr, const _Tpwvec& a) \ +{ \ + hreg a1 = rshr(a.val, n); \ + msa_st1_##suffix(ptr, a1); \ +} + +OPENCV_HAL_IMPL_MSA_PACK_STORE(v_uint8x16, uchar, v8u8, u8, v_uint16x8, pack, msa_qmovn_u16, msa_qrshrn_n_u16) +OPENCV_HAL_IMPL_MSA_PACK_STORE(v_int8x16, schar, v8i8, s8, v_int16x8, pack, msa_qmovn_s16, msa_qrshrn_n_s16) +OPENCV_HAL_IMPL_MSA_PACK_STORE(v_uint16x8, ushort, v4u16, u16, v_uint32x4, pack, msa_qmovn_u32, msa_qrshrn_n_u32) +OPENCV_HAL_IMPL_MSA_PACK_STORE(v_int16x8, short, v4i16, s16, v_int32x4, pack, msa_qmovn_s32, msa_qrshrn_n_s32) +OPENCV_HAL_IMPL_MSA_PACK_STORE(v_uint32x4, unsigned, v2u32, u32, v_uint64x2, pack, msa_movn_u64, msa_rshrn_n_u64) +OPENCV_HAL_IMPL_MSA_PACK_STORE(v_int32x4, int, v2i32, s32, v_int64x2, pack, msa_movn_s64, msa_rshrn_n_s64) +OPENCV_HAL_IMPL_MSA_PACK_STORE(v_uint8x16, uchar, v8u8, u8, v_int16x8, pack_u, msa_qmovun_s16, msa_qrshrun_n_s16) +OPENCV_HAL_IMPL_MSA_PACK_STORE(v_uint16x8, ushort, v4u16, u16, v_int32x4, pack_u, msa_qmovun_s32, msa_qrshrun_n_s32) + +// pack boolean +inline v_uint8x16 v_pack_b(const v_uint16x8& a, const v_uint16x8& b) +{ + return v_uint8x16(msa_pack_u16(a.val, b.val)); +} + +inline v_uint8x16 v_pack_b(const v_uint32x4& a, const v_uint32x4& b, + const v_uint32x4& c, const v_uint32x4& d) +{ + return v_uint8x16(msa_pack_u16(msa_pack_u32(a.val, b.val), msa_pack_u32(c.val, d.val))); +} + +inline v_uint8x16 v_pack_b(const v_uint64x2& a, const v_uint64x2& b, const v_uint64x2& c, + const v_uint64x2& d, const v_uint64x2& e, const v_uint64x2& f, + const v_uint64x2& g, const v_uint64x2& h) +{ + v8u16 abcd = msa_pack_u32(msa_pack_u64(a.val, b.val), msa_pack_u64(c.val, d.val)); + v8u16 efgh = msa_pack_u32(msa_pack_u64(e.val, f.val), msa_pack_u64(g.val, h.val)); + return v_uint8x16(msa_pack_u16(abcd, efgh)); +} + +inline v_float32x4 v_matmul(const v_float32x4& v, const v_float32x4& m0, + const v_float32x4& m1, const v_float32x4& m2, + const v_float32x4& m3) +{ + v4f32 v0 = v.val; + v4f32 res = msa_mulq_lane_f32(m0.val, v0, 0); + res = msa_mlaq_lane_f32(res, m1.val, v0, 1); + res = msa_mlaq_lane_f32(res, m2.val, v0, 2); + res = msa_mlaq_lane_f32(res, m3.val, v0, 3); + return v_float32x4(res); +} + +inline v_float32x4 v_matmuladd(const v_float32x4& v, const v_float32x4& m0, + const v_float32x4& m1, const v_float32x4& m2, + const v_float32x4& a) +{ + v4f32 v0 = v.val; + v4f32 res = msa_mulq_lane_f32(m0.val, v0, 0); + res = msa_mlaq_lane_f32(res, m1.val, v0, 1); + res = msa_mlaq_lane_f32(res, m2.val, v0, 2); + res = msa_addq_f32(res, a.val); + return v_float32x4(res); +} + +#define OPENCV_HAL_IMPL_MSA_BIN_OP(bin_op, _Tpvec, intrin) \ +inline _Tpvec operator bin_op (const _Tpvec& a, const _Tpvec& b) \ +{ \ + return _Tpvec(intrin(a.val, b.val)); \ +} \ +inline _Tpvec& operator bin_op##= (_Tpvec& a, const _Tpvec& b) \ +{ \ + a.val = intrin(a.val, b.val); \ + return a; \ +} + +OPENCV_HAL_IMPL_MSA_BIN_OP(+, v_uint8x16, msa_qaddq_u8) +OPENCV_HAL_IMPL_MSA_BIN_OP(-, v_uint8x16, msa_qsubq_u8) +OPENCV_HAL_IMPL_MSA_BIN_OP(+, v_int8x16, msa_qaddq_s8) +OPENCV_HAL_IMPL_MSA_BIN_OP(-, v_int8x16, msa_qsubq_s8) +OPENCV_HAL_IMPL_MSA_BIN_OP(+, v_uint16x8, msa_qaddq_u16) +OPENCV_HAL_IMPL_MSA_BIN_OP(-, v_uint16x8, msa_qsubq_u16) +OPENCV_HAL_IMPL_MSA_BIN_OP(+, v_int16x8, msa_qaddq_s16) +OPENCV_HAL_IMPL_MSA_BIN_OP(-, v_int16x8, msa_qsubq_s16) +OPENCV_HAL_IMPL_MSA_BIN_OP(+, v_int32x4, msa_addq_s32) +OPENCV_HAL_IMPL_MSA_BIN_OP(-, v_int32x4, msa_subq_s32) +OPENCV_HAL_IMPL_MSA_BIN_OP(*, v_int32x4, msa_mulq_s32) +OPENCV_HAL_IMPL_MSA_BIN_OP(+, v_uint32x4, msa_addq_u32) +OPENCV_HAL_IMPL_MSA_BIN_OP(-, v_uint32x4, msa_subq_u32) +OPENCV_HAL_IMPL_MSA_BIN_OP(*, v_uint32x4, msa_mulq_u32) +OPENCV_HAL_IMPL_MSA_BIN_OP(+, v_float32x4, msa_addq_f32) +OPENCV_HAL_IMPL_MSA_BIN_OP(-, v_float32x4, msa_subq_f32) +OPENCV_HAL_IMPL_MSA_BIN_OP(*, v_float32x4, msa_mulq_f32) +OPENCV_HAL_IMPL_MSA_BIN_OP(+, v_int64x2, msa_addq_s64) +OPENCV_HAL_IMPL_MSA_BIN_OP(-, v_int64x2, msa_subq_s64) +OPENCV_HAL_IMPL_MSA_BIN_OP(+, v_uint64x2, msa_addq_u64) +OPENCV_HAL_IMPL_MSA_BIN_OP(-, v_uint64x2, msa_subq_u64) +OPENCV_HAL_IMPL_MSA_BIN_OP(/, v_float32x4, msa_divq_f32) +OPENCV_HAL_IMPL_MSA_BIN_OP(+, v_float64x2, msa_addq_f64) +OPENCV_HAL_IMPL_MSA_BIN_OP(-, v_float64x2, msa_subq_f64) +OPENCV_HAL_IMPL_MSA_BIN_OP(*, v_float64x2, msa_mulq_f64) +OPENCV_HAL_IMPL_MSA_BIN_OP(/, v_float64x2, msa_divq_f64) + +// saturating multiply 8-bit, 16-bit +#define OPENCV_HAL_IMPL_MSA_MUL_SAT(_Tpvec, _Tpwvec) \ +inline _Tpvec operator * (const _Tpvec& a, const _Tpvec& b) \ +{ \ + _Tpwvec c, d; \ + v_mul_expand(a, b, c, d); \ + return v_pack(c, d); \ +} \ +inline _Tpvec& operator *= (_Tpvec& a, const _Tpvec& b) \ +{a = a * b; return a; } + +OPENCV_HAL_IMPL_MSA_MUL_SAT(v_int8x16, v_int16x8) +OPENCV_HAL_IMPL_MSA_MUL_SAT(v_uint8x16, v_uint16x8) +OPENCV_HAL_IMPL_MSA_MUL_SAT(v_int16x8, v_int32x4) +OPENCV_HAL_IMPL_MSA_MUL_SAT(v_uint16x8, v_uint32x4) + +// Multiply and expand +inline void v_mul_expand(const v_int8x16& a, const v_int8x16& b, + v_int16x8& c, v_int16x8& d) +{ + v16i8 a_lo, a_hi, b_lo, b_hi; + + ILVRL_B2_SB(a.val, msa_dupq_n_s8(0), a_lo, a_hi); + ILVRL_B2_SB(b.val, msa_dupq_n_s8(0), b_lo, b_hi); + c.val = msa_mulq_s16(msa_paddlq_s8(a_lo), msa_paddlq_s8(b_lo)); + d.val = msa_mulq_s16(msa_paddlq_s8(a_hi), msa_paddlq_s8(b_hi)); +} + +inline void v_mul_expand(const v_uint8x16& a, const v_uint8x16& b, + v_uint16x8& c, v_uint16x8& d) +{ + v16u8 a_lo, a_hi, b_lo, b_hi; + + ILVRL_B2_UB(a.val, msa_dupq_n_u8(0), a_lo, a_hi); + ILVRL_B2_UB(b.val, msa_dupq_n_u8(0), b_lo, b_hi); + c.val = msa_mulq_u16(msa_paddlq_u8(a_lo), msa_paddlq_u8(b_lo)); + d.val = msa_mulq_u16(msa_paddlq_u8(a_hi), msa_paddlq_u8(b_hi)); +} + +inline void v_mul_expand(const v_int16x8& a, const v_int16x8& b, + v_int32x4& c, v_int32x4& d) +{ + v8i16 a_lo, a_hi, b_lo, b_hi; + + ILVRL_H2_SH(a.val, msa_dupq_n_s16(0), a_lo, a_hi); + ILVRL_H2_SH(b.val, msa_dupq_n_s16(0), b_lo, b_hi); + c.val = msa_mulq_s32(msa_paddlq_s16(a_lo), msa_paddlq_s16(b_lo)); + d.val = msa_mulq_s32(msa_paddlq_s16(a_hi), msa_paddlq_s16(b_hi)); +} + +inline void v_mul_expand(const v_uint16x8& a, const v_uint16x8& b, + v_uint32x4& c, v_uint32x4& d) +{ + v8u16 a_lo, a_hi, b_lo, b_hi; + + ILVRL_H2_UH(a.val, msa_dupq_n_u16(0), a_lo, a_hi); + ILVRL_H2_UH(b.val, msa_dupq_n_u16(0), b_lo, b_hi); + c.val = msa_mulq_u32(msa_paddlq_u16(a_lo), msa_paddlq_u16(b_lo)); + d.val = msa_mulq_u32(msa_paddlq_u16(a_hi), msa_paddlq_u16(b_hi)); +} + +inline void v_mul_expand(const v_uint32x4& a, const v_uint32x4& b, + v_uint64x2& c, v_uint64x2& d) +{ + v4u32 a_lo, a_hi, b_lo, b_hi; + + ILVRL_W2_UW(a.val, msa_dupq_n_u32(0), a_lo, a_hi); + ILVRL_W2_UW(b.val, msa_dupq_n_u32(0), b_lo, b_hi); + c.val = msa_mulq_u64(msa_paddlq_u32(a_lo), msa_paddlq_u32(b_lo)); + d.val = msa_mulq_u64(msa_paddlq_u32(a_hi), msa_paddlq_u32(b_hi)); +} + +inline v_int16x8 v_mul_hi(const v_int16x8& a, const v_int16x8& b) +{ + v8i16 a_lo, a_hi, b_lo, b_hi; + + ILVRL_H2_SH(a.val, msa_dupq_n_s16(0), a_lo, a_hi); + ILVRL_H2_SH(b.val, msa_dupq_n_s16(0), b_lo, b_hi); + + return v_int16x8(msa_packr_s32(msa_mulq_s32(msa_paddlq_s16(a_lo), msa_paddlq_s16(b_lo)), + msa_mulq_s32(msa_paddlq_s16(a_hi), msa_paddlq_s16(b_hi)), 16)); +} + +inline v_uint16x8 v_mul_hi(const v_uint16x8& a, const v_uint16x8& b) +{ + v8u16 a_lo, a_hi, b_lo, b_hi; + + ILVRL_H2_UH(a.val, msa_dupq_n_u16(0), a_lo, a_hi); + ILVRL_H2_UH(b.val, msa_dupq_n_u16(0), b_lo, b_hi); + + return v_uint16x8(msa_packr_u32(msa_mulq_u32(msa_paddlq_u16(a_lo), msa_paddlq_u16(b_lo)), + msa_mulq_u32(msa_paddlq_u16(a_hi), msa_paddlq_u16(b_hi)), 16)); +} + +//////// Dot Product //////// + +// 16 >> 32 +inline v_int32x4 v_dotprod(const v_int16x8& a, const v_int16x8& b) +{ return v_int32x4(msa_dotp_s_w(a.val, b.val)); } +inline v_int32x4 v_dotprod(const v_int16x8& a, const v_int16x8& b, const v_int32x4& c) +{ return v_int32x4(msa_dpadd_s_w(c.val , a.val, b.val)); } + +// 32 >> 64 +inline v_int64x2 v_dotprod(const v_int32x4& a, const v_int32x4& b) +{ return v_int64x2(msa_dotp_s_d(a.val, b.val)); } +inline v_int64x2 v_dotprod(const v_int32x4& a, const v_int32x4& b, const v_int64x2& c) +{ return v_int64x2(msa_dpadd_s_d(c.val , a.val, b.val)); } + +// 8 >> 32 +inline v_uint32x4 v_dotprod_expand(const v_uint8x16& a, const v_uint8x16& b) +{ + v8u16 even_a = msa_shrq_n_u16(msa_shlq_n_u16(MSA_TPV_REINTERPRET(v8u16, a.val), 8), 8); + v8u16 odd_a = msa_shrq_n_u16(MSA_TPV_REINTERPRET(v8u16, a.val), 8); + v8u16 even_b = msa_shrq_n_u16(msa_shlq_n_u16(MSA_TPV_REINTERPRET(v8u16, b.val), 8), 8); + v8u16 odd_b = msa_shrq_n_u16(MSA_TPV_REINTERPRET(v8u16, b.val), 8); + v4u32 prod = msa_dotp_u_w(even_a, even_b); + return v_uint32x4(msa_dpadd_u_w(prod, odd_a, odd_b)); +} +inline v_uint32x4 v_dotprod_expand(const v_uint8x16& a, const v_uint8x16& b, const v_uint32x4& c) +{ + v8u16 even_a = msa_shrq_n_u16(msa_shlq_n_u16(MSA_TPV_REINTERPRET(v8u16, a.val), 8), 8); + v8u16 odd_a = msa_shrq_n_u16(MSA_TPV_REINTERPRET(v8u16, a.val), 8); + v8u16 even_b = msa_shrq_n_u16(msa_shlq_n_u16(MSA_TPV_REINTERPRET(v8u16, b.val), 8), 8); + v8u16 odd_b = msa_shrq_n_u16(MSA_TPV_REINTERPRET(v8u16, b.val), 8); + v4u32 prod = msa_dpadd_u_w(c.val, even_a, even_b); + return v_uint32x4(msa_dpadd_u_w(prod, odd_a, odd_b)); +} + +inline v_int32x4 v_dotprod_expand(const v_int8x16& a, const v_int8x16& b) +{ + v8i16 prod = msa_dotp_s_h(a.val, b.val); + return v_int32x4(msa_hadd_s32(prod, prod)); +} +inline v_int32x4 v_dotprod_expand(const v_int8x16& a, const v_int8x16& b, + const v_int32x4& c) +{ return v_dotprod_expand(a, b) + c; } + +// 16 >> 64 +inline v_uint64x2 v_dotprod_expand(const v_uint16x8& a, const v_uint16x8& b) +{ + v4u32 even_a = msa_shrq_n_u32(msa_shlq_n_u32(MSA_TPV_REINTERPRET(v4u32, a.val), 16), 16); + v4u32 odd_a = msa_shrq_n_u32(MSA_TPV_REINTERPRET(v4u32, a.val), 16); + v4u32 even_b = msa_shrq_n_u32(msa_shlq_n_u32(MSA_TPV_REINTERPRET(v4u32, b.val), 16), 16); + v4u32 odd_b = msa_shrq_n_u32(MSA_TPV_REINTERPRET(v4u32, b.val), 16); + v2u64 prod = msa_dotp_u_d(even_a, even_b); + return v_uint64x2(msa_dpadd_u_d(prod, odd_a, odd_b)); +} +inline v_uint64x2 v_dotprod_expand(const v_uint16x8& a, const v_uint16x8& b, + const v_uint64x2& c) +{ + v4u32 even_a = msa_shrq_n_u32(msa_shlq_n_u32(MSA_TPV_REINTERPRET(v4u32, a.val), 16), 16); + v4u32 odd_a = msa_shrq_n_u32(MSA_TPV_REINTERPRET(v4u32, a.val), 16); + v4u32 even_b = msa_shrq_n_u32(msa_shlq_n_u32(MSA_TPV_REINTERPRET(v4u32, b.val), 16), 16); + v4u32 odd_b = msa_shrq_n_u32(MSA_TPV_REINTERPRET(v4u32, b.val), 16); + v2u64 prod = msa_dpadd_u_d(c.val, even_a, even_b); + return v_uint64x2(msa_dpadd_u_d(prod, odd_a, odd_b)); +} + +inline v_int64x2 v_dotprod_expand(const v_int16x8& a, const v_int16x8& b) +{ + v4i32 prod = msa_dotp_s_w(a.val, b.val); + return v_int64x2(msa_hadd_s64(prod, prod)); +} +inline v_int64x2 v_dotprod_expand(const v_int16x8& a, const v_int16x8& b, const v_int64x2& c) +{ return v_dotprod_expand(a, b) + c; } + +// 32 >> 64f +inline v_float64x2 v_dotprod_expand(const v_int32x4& a, const v_int32x4& b) +{ return v_cvt_f64(v_dotprod(a, b)); } +inline v_float64x2 v_dotprod_expand(const v_int32x4& a, const v_int32x4& b, const v_float64x2& c) +{ return v_dotprod_expand(a, b) + c; } + + +//////// Fast Dot Product //////// + +// 16 >> 32 +inline v_int32x4 v_dotprod_fast(const v_int16x8& a, const v_int16x8& b) +{ return v_dotprod(a, b); } +inline v_int32x4 v_dotprod_fast(const v_int16x8& a, const v_int16x8& b, const v_int32x4& c) +{ return v_dotprod(a, b, c); } + +// 32 >> 64 +inline v_int64x2 v_dotprod_fast(const v_int32x4& a, const v_int32x4& b) +{ return v_dotprod(a, b); } +inline v_int64x2 v_dotprod_fast(const v_int32x4& a, const v_int32x4& b, const v_int64x2& c) +{ return v_dotprod(a, b, c); } + +// 8 >> 32 +inline v_uint32x4 v_dotprod_expand_fast(const v_uint8x16& a, const v_uint8x16& b) +{ return v_dotprod_expand(a, b); } +inline v_uint32x4 v_dotprod_expand_fast(const v_uint8x16& a, const v_uint8x16& b, const v_uint32x4& c) +{ return v_dotprod_expand(a, b, c); } +inline v_int32x4 v_dotprod_expand_fast(const v_int8x16& a, const v_int8x16& b) +{ return v_dotprod_expand(a, b); } +inline v_int32x4 v_dotprod_expand_fast(const v_int8x16& a, const v_int8x16& b, const v_int32x4& c) +{ return v_dotprod_expand(a, b, c); } + +// 16 >> 64 +inline v_uint64x2 v_dotprod_expand_fast(const v_uint16x8& a, const v_uint16x8& b) +{ return v_dotprod_expand(a, b); } +inline v_uint64x2 v_dotprod_expand_fast(const v_uint16x8& a, const v_uint16x8& b, const v_uint64x2& c) +{ return v_dotprod_expand(a, b, c); } +inline v_int64x2 v_dotprod_expand_fast(const v_int16x8& a, const v_int16x8& b) +{ return v_dotprod_expand(a, b); } +inline v_int64x2 v_dotprod_expand_fast(const v_int16x8& a, const v_int16x8& b, const v_int64x2& c) +{ return v_dotprod_expand(a, b, c); } + +// 32 >> 64f +inline v_float64x2 v_dotprod_expand_fast(const v_int32x4& a, const v_int32x4& b) +{ return v_dotprod_expand(a, b); } +inline v_float64x2 v_dotprod_expand_fast(const v_int32x4& a, const v_int32x4& b, const v_float64x2& c) +{ return v_dotprod_expand(a, b, c); } + +#define OPENCV_HAL_IMPL_MSA_LOGIC_OP(_Tpvec, _Tpv, suffix) \ +OPENCV_HAL_IMPL_MSA_BIN_OP(&, _Tpvec, msa_andq_##suffix) \ +OPENCV_HAL_IMPL_MSA_BIN_OP(|, _Tpvec, msa_orrq_##suffix) \ +OPENCV_HAL_IMPL_MSA_BIN_OP(^, _Tpvec, msa_eorq_##suffix) \ +inline _Tpvec operator ~ (const _Tpvec& a) \ +{ \ + return _Tpvec(MSA_TPV_REINTERPRET(_Tpv, msa_mvnq_u8(MSA_TPV_REINTERPRET(v16u8, a.val)))); \ +} + +OPENCV_HAL_IMPL_MSA_LOGIC_OP(v_uint8x16, v16u8, u8) +OPENCV_HAL_IMPL_MSA_LOGIC_OP(v_int8x16, v16i8, s8) +OPENCV_HAL_IMPL_MSA_LOGIC_OP(v_uint16x8, v8u16, u16) +OPENCV_HAL_IMPL_MSA_LOGIC_OP(v_int16x8, v8i16, s16) +OPENCV_HAL_IMPL_MSA_LOGIC_OP(v_uint32x4, v4u32, u32) +OPENCV_HAL_IMPL_MSA_LOGIC_OP(v_int32x4, v4i32, s32) +OPENCV_HAL_IMPL_MSA_LOGIC_OP(v_uint64x2, v2u64, u64) +OPENCV_HAL_IMPL_MSA_LOGIC_OP(v_int64x2, v2i64, s64) + +#define OPENCV_HAL_IMPL_MSA_FLT_BIT_OP(bin_op, intrin) \ +inline v_float32x4 operator bin_op (const v_float32x4& a, const v_float32x4& b) \ +{ \ + return v_float32x4(MSA_TPV_REINTERPRET(v4f32, intrin(MSA_TPV_REINTERPRET(v4i32, a.val), MSA_TPV_REINTERPRET(v4i32, b.val)))); \ +} \ +inline v_float32x4& operator bin_op##= (v_float32x4& a, const v_float32x4& b) \ +{ \ + a.val = MSA_TPV_REINTERPRET(v4f32, intrin(MSA_TPV_REINTERPRET(v4i32, a.val), MSA_TPV_REINTERPRET(v4i32, b.val))); \ + return a; \ +} + +OPENCV_HAL_IMPL_MSA_FLT_BIT_OP(&, msa_andq_s32) +OPENCV_HAL_IMPL_MSA_FLT_BIT_OP(|, msa_orrq_s32) +OPENCV_HAL_IMPL_MSA_FLT_BIT_OP(^, msa_eorq_s32) + +inline v_float32x4 operator ~ (const v_float32x4& a) +{ + return v_float32x4(MSA_TPV_REINTERPRET(v4f32, msa_mvnq_s32(MSA_TPV_REINTERPRET(v4i32, a.val)))); +} + +/* v_abs */ +#define OPENCV_HAL_IMPL_MSA_ABS(_Tpuvec, _Tpsvec, usuffix, ssuffix) \ +inline _Tpuvec v_abs(const _Tpsvec& a) \ +{ \ + return v_reinterpret_as_##usuffix(_Tpsvec(msa_absq_##ssuffix(a.val))); \ +} + +OPENCV_HAL_IMPL_MSA_ABS(v_uint8x16, v_int8x16, u8, s8) +OPENCV_HAL_IMPL_MSA_ABS(v_uint16x8, v_int16x8, u16, s16) +OPENCV_HAL_IMPL_MSA_ABS(v_uint32x4, v_int32x4, u32, s32) + +/* v_abs(float), v_sqrt, v_invsqrt */ +#define OPENCV_HAL_IMPL_MSA_BASIC_FUNC(_Tpvec, func, intrin) \ +inline _Tpvec func(const _Tpvec& a) \ +{ \ + return _Tpvec(intrin(a.val)); \ +} + +OPENCV_HAL_IMPL_MSA_BASIC_FUNC(v_float32x4, v_abs, msa_absq_f32) +OPENCV_HAL_IMPL_MSA_BASIC_FUNC(v_float64x2, v_abs, msa_absq_f64) +OPENCV_HAL_IMPL_MSA_BASIC_FUNC(v_float32x4, v_sqrt, msa_sqrtq_f32) +OPENCV_HAL_IMPL_MSA_BASIC_FUNC(v_float32x4, v_invsqrt, msa_rsqrtq_f32) +OPENCV_HAL_IMPL_MSA_BASIC_FUNC(v_float64x2, v_sqrt, msa_sqrtq_f64) +OPENCV_HAL_IMPL_MSA_BASIC_FUNC(v_float64x2, v_invsqrt, msa_rsqrtq_f64) + +#define OPENCV_HAL_IMPL_MSA_DBL_BIT_OP(bin_op, intrin) \ +inline v_float64x2 operator bin_op (const v_float64x2& a, const v_float64x2& b) \ +{ \ + return v_float64x2(MSA_TPV_REINTERPRET(v2f64, intrin(MSA_TPV_REINTERPRET(v2i64, a.val), MSA_TPV_REINTERPRET(v2i64, b.val)))); \ +} \ +inline v_float64x2& operator bin_op##= (v_float64x2& a, const v_float64x2& b) \ +{ \ + a.val = MSA_TPV_REINTERPRET(v2f64, intrin(MSA_TPV_REINTERPRET(v2i64, a.val), MSA_TPV_REINTERPRET(v2i64, b.val))); \ + return a; \ +} + +OPENCV_HAL_IMPL_MSA_DBL_BIT_OP(&, msa_andq_s64) +OPENCV_HAL_IMPL_MSA_DBL_BIT_OP(|, msa_orrq_s64) +OPENCV_HAL_IMPL_MSA_DBL_BIT_OP(^, msa_eorq_s64) + +inline v_float64x2 operator ~ (const v_float64x2& a) +{ + return v_float64x2(MSA_TPV_REINTERPRET(v2f64, msa_mvnq_s32(MSA_TPV_REINTERPRET(v4i32, a.val)))); +} + +// TODO: exp, log, sin, cos + +#define OPENCV_HAL_IMPL_MSA_BIN_FUNC(_Tpvec, func, intrin) \ +inline _Tpvec func(const _Tpvec& a, const _Tpvec& b) \ +{ \ + return _Tpvec(intrin(a.val, b.val)); \ +} + +OPENCV_HAL_IMPL_MSA_BIN_FUNC(v_uint8x16, v_min, msa_minq_u8) +OPENCV_HAL_IMPL_MSA_BIN_FUNC(v_uint8x16, v_max, msa_maxq_u8) +OPENCV_HAL_IMPL_MSA_BIN_FUNC(v_int8x16, v_min, msa_minq_s8) +OPENCV_HAL_IMPL_MSA_BIN_FUNC(v_int8x16, v_max, msa_maxq_s8) +OPENCV_HAL_IMPL_MSA_BIN_FUNC(v_uint16x8, v_min, msa_minq_u16) +OPENCV_HAL_IMPL_MSA_BIN_FUNC(v_uint16x8, v_max, msa_maxq_u16) +OPENCV_HAL_IMPL_MSA_BIN_FUNC(v_int16x8, v_min, msa_minq_s16) +OPENCV_HAL_IMPL_MSA_BIN_FUNC(v_int16x8, v_max, msa_maxq_s16) +OPENCV_HAL_IMPL_MSA_BIN_FUNC(v_uint32x4, v_min, msa_minq_u32) +OPENCV_HAL_IMPL_MSA_BIN_FUNC(v_uint32x4, v_max, msa_maxq_u32) +OPENCV_HAL_IMPL_MSA_BIN_FUNC(v_int32x4, v_min, msa_minq_s32) +OPENCV_HAL_IMPL_MSA_BIN_FUNC(v_int32x4, v_max, msa_maxq_s32) +OPENCV_HAL_IMPL_MSA_BIN_FUNC(v_float32x4, v_min, msa_minq_f32) +OPENCV_HAL_IMPL_MSA_BIN_FUNC(v_float32x4, v_max, msa_maxq_f32) +OPENCV_HAL_IMPL_MSA_BIN_FUNC(v_float64x2, v_min, msa_minq_f64) +OPENCV_HAL_IMPL_MSA_BIN_FUNC(v_float64x2, v_max, msa_maxq_f64) + +#define OPENCV_HAL_IMPL_MSA_INT_CMP_OP(_Tpvec, _Tpv, suffix, not_suffix) \ +inline _Tpvec operator == (const _Tpvec& a, const _Tpvec& b) \ +{ return _Tpvec(MSA_TPV_REINTERPRET(_Tpv, msa_ceqq_##suffix(a.val, b.val))); } \ +inline _Tpvec operator != (const _Tpvec& a, const _Tpvec& b) \ +{ return _Tpvec(MSA_TPV_REINTERPRET(_Tpv, msa_mvnq_##not_suffix(msa_ceqq_##suffix(a.val, b.val)))); } \ +inline _Tpvec operator < (const _Tpvec& a, const _Tpvec& b) \ +{ return _Tpvec(MSA_TPV_REINTERPRET(_Tpv, msa_cltq_##suffix(a.val, b.val))); } \ +inline _Tpvec operator > (const _Tpvec& a, const _Tpvec& b) \ +{ return _Tpvec(MSA_TPV_REINTERPRET(_Tpv, msa_cgtq_##suffix(a.val, b.val))); } \ +inline _Tpvec operator <= (const _Tpvec& a, const _Tpvec& b) \ +{ return _Tpvec(MSA_TPV_REINTERPRET(_Tpv, msa_cleq_##suffix(a.val, b.val))); } \ +inline _Tpvec operator >= (const _Tpvec& a, const _Tpvec& b) \ +{ return _Tpvec(MSA_TPV_REINTERPRET(_Tpv, msa_cgeq_##suffix(a.val, b.val))); } + +OPENCV_HAL_IMPL_MSA_INT_CMP_OP(v_uint8x16, v16u8, u8, u8) +OPENCV_HAL_IMPL_MSA_INT_CMP_OP(v_int8x16, v16i8, s8, u8) +OPENCV_HAL_IMPL_MSA_INT_CMP_OP(v_uint16x8, v8u16, u16, u16) +OPENCV_HAL_IMPL_MSA_INT_CMP_OP(v_int16x8, v8i16, s16, u16) +OPENCV_HAL_IMPL_MSA_INT_CMP_OP(v_uint32x4, v4u32, u32, u32) +OPENCV_HAL_IMPL_MSA_INT_CMP_OP(v_int32x4, v4i32, s32, u32) +OPENCV_HAL_IMPL_MSA_INT_CMP_OP(v_float32x4, v4f32, f32, u32) +OPENCV_HAL_IMPL_MSA_INT_CMP_OP(v_uint64x2, v2u64, u64, u64) +OPENCV_HAL_IMPL_MSA_INT_CMP_OP(v_int64x2, v2i64, s64, u64) +OPENCV_HAL_IMPL_MSA_INT_CMP_OP(v_float64x2, v2f64, f64, u64) + +inline v_float32x4 v_not_nan(const v_float32x4& a) +{ return v_float32x4(MSA_TPV_REINTERPRET(v4f32, msa_ceqq_f32(a.val, a.val))); } +inline v_float64x2 v_not_nan(const v_float64x2& a) +{ return v_float64x2(MSA_TPV_REINTERPRET(v2f64, msa_ceqq_f64(a.val, a.val))); } + +OPENCV_HAL_IMPL_MSA_BIN_FUNC(v_uint8x16, v_add_wrap, msa_addq_u8) +OPENCV_HAL_IMPL_MSA_BIN_FUNC(v_int8x16, v_add_wrap, msa_addq_s8) +OPENCV_HAL_IMPL_MSA_BIN_FUNC(v_uint16x8, v_add_wrap, msa_addq_u16) +OPENCV_HAL_IMPL_MSA_BIN_FUNC(v_int16x8, v_add_wrap, msa_addq_s16) +OPENCV_HAL_IMPL_MSA_BIN_FUNC(v_uint8x16, v_sub_wrap, msa_subq_u8) +OPENCV_HAL_IMPL_MSA_BIN_FUNC(v_int8x16, v_sub_wrap, msa_subq_s8) +OPENCV_HAL_IMPL_MSA_BIN_FUNC(v_uint16x8, v_sub_wrap, msa_subq_u16) +OPENCV_HAL_IMPL_MSA_BIN_FUNC(v_int16x8, v_sub_wrap, msa_subq_s16) +OPENCV_HAL_IMPL_MSA_BIN_FUNC(v_uint8x16, v_mul_wrap, msa_mulq_u8) +OPENCV_HAL_IMPL_MSA_BIN_FUNC(v_int8x16, v_mul_wrap, msa_mulq_s8) +OPENCV_HAL_IMPL_MSA_BIN_FUNC(v_uint16x8, v_mul_wrap, msa_mulq_u16) +OPENCV_HAL_IMPL_MSA_BIN_FUNC(v_int16x8, v_mul_wrap, msa_mulq_s16) + +OPENCV_HAL_IMPL_MSA_BIN_FUNC(v_uint8x16, v_absdiff, msa_abdq_u8) +OPENCV_HAL_IMPL_MSA_BIN_FUNC(v_uint16x8, v_absdiff, msa_abdq_u16) +OPENCV_HAL_IMPL_MSA_BIN_FUNC(v_uint32x4, v_absdiff, msa_abdq_u32) +OPENCV_HAL_IMPL_MSA_BIN_FUNC(v_float32x4, v_absdiff, msa_abdq_f32) +OPENCV_HAL_IMPL_MSA_BIN_FUNC(v_float64x2, v_absdiff, msa_abdq_f64) + +/** Saturating absolute difference **/ +OPENCV_HAL_IMPL_MSA_BIN_FUNC(v_int8x16, v_absdiffs, msa_qabdq_s8) +OPENCV_HAL_IMPL_MSA_BIN_FUNC(v_int16x8, v_absdiffs, msa_qabdq_s16) + +#define OPENCV_HAL_IMPL_MSA_BIN_FUNC2(_Tpvec, _Tpvec2, _Tpv, func, intrin) \ +inline _Tpvec2 func(const _Tpvec& a, const _Tpvec& b) \ +{ \ + return _Tpvec2(MSA_TPV_REINTERPRET(_Tpv, intrin(a.val, b.val))); \ +} + +OPENCV_HAL_IMPL_MSA_BIN_FUNC2(v_int8x16, v_uint8x16, v16u8, v_absdiff, msa_abdq_s8) +OPENCV_HAL_IMPL_MSA_BIN_FUNC2(v_int16x8, v_uint16x8, v8u16, v_absdiff, msa_abdq_s16) +OPENCV_HAL_IMPL_MSA_BIN_FUNC2(v_int32x4, v_uint32x4, v4u32, v_absdiff, msa_abdq_s32) + +/* v_magnitude, v_sqr_magnitude, v_fma, v_muladd */ +inline v_float32x4 v_magnitude(const v_float32x4& a, const v_float32x4& b) +{ + v_float32x4 x(msa_mlaq_f32(msa_mulq_f32(a.val, a.val), b.val, b.val)); + return v_sqrt(x); +} + +inline v_float32x4 v_sqr_magnitude(const v_float32x4& a, const v_float32x4& b) +{ + return v_float32x4(msa_mlaq_f32(msa_mulq_f32(a.val, a.val), b.val, b.val)); +} + +inline v_float32x4 v_fma(const v_float32x4& a, const v_float32x4& b, const v_float32x4& c) +{ + return v_float32x4(msa_mlaq_f32(c.val, a.val, b.val)); +} + +inline v_int32x4 v_fma(const v_int32x4& a, const v_int32x4& b, const v_int32x4& c) +{ + return v_int32x4(msa_mlaq_s32(c.val, a.val, b.val)); +} + +inline v_float32x4 v_muladd(const v_float32x4& a, const v_float32x4& b, const v_float32x4& c) +{ + return v_fma(a, b, c); +} + +inline v_int32x4 v_muladd(const v_int32x4& a, const v_int32x4& b, const v_int32x4& c) +{ + return v_fma(a, b, c); +} + +inline v_float64x2 v_magnitude(const v_float64x2& a, const v_float64x2& b) +{ + v_float64x2 x(msa_mlaq_f64(msa_mulq_f64(a.val, a.val), b.val, b.val)); + return v_sqrt(x); +} + +inline v_float64x2 v_sqr_magnitude(const v_float64x2& a, const v_float64x2& b) +{ + return v_float64x2(msa_mlaq_f64(msa_mulq_f64(a.val, a.val), b.val, b.val)); +} + +inline v_float64x2 v_fma(const v_float64x2& a, const v_float64x2& b, const v_float64x2& c) +{ + return v_float64x2(msa_mlaq_f64(c.val, a.val, b.val)); +} + +inline v_float64x2 v_muladd(const v_float64x2& a, const v_float64x2& b, const v_float64x2& c) +{ + return v_fma(a, b, c); +} + +// trade efficiency for convenience +#define OPENCV_HAL_IMPL_MSA_SHIFT_OP(_Tpvec, suffix, _Tps, ssuffix) \ +inline _Tpvec operator << (const _Tpvec& a, int n) \ +{ return _Tpvec(msa_shlq_##suffix(a.val, msa_dupq_n_##ssuffix((_Tps)n))); } \ +inline _Tpvec operator >> (const _Tpvec& a, int n) \ +{ return _Tpvec(msa_shrq_##suffix(a.val, msa_dupq_n_##ssuffix((_Tps)n))); } \ +template inline _Tpvec v_shl(const _Tpvec& a) \ +{ return _Tpvec(msa_shlq_n_##suffix(a.val, n)); } \ +template inline _Tpvec v_shr(const _Tpvec& a) \ +{ return _Tpvec(msa_shrq_n_##suffix(a.val, n)); } \ +template inline _Tpvec v_rshr(const _Tpvec& a) \ +{ return _Tpvec(msa_rshrq_n_##suffix(a.val, n)); } + +OPENCV_HAL_IMPL_MSA_SHIFT_OP(v_uint8x16, u8, schar, s8) +OPENCV_HAL_IMPL_MSA_SHIFT_OP(v_int8x16, s8, schar, s8) +OPENCV_HAL_IMPL_MSA_SHIFT_OP(v_uint16x8, u16, short, s16) +OPENCV_HAL_IMPL_MSA_SHIFT_OP(v_int16x8, s16, short, s16) +OPENCV_HAL_IMPL_MSA_SHIFT_OP(v_uint32x4, u32, int, s32) +OPENCV_HAL_IMPL_MSA_SHIFT_OP(v_int32x4, s32, int, s32) +OPENCV_HAL_IMPL_MSA_SHIFT_OP(v_uint64x2, u64, int64, s64) +OPENCV_HAL_IMPL_MSA_SHIFT_OP(v_int64x2, s64, int64, s64) + +/* v_rotate_right, v_rotate_left */ +#define OPENCV_HAL_IMPL_MSA_ROTATE_OP(_Tpvec, _Tpv, _Tpvs, suffix) \ +template inline _Tpvec v_rotate_right(const _Tpvec& a) \ +{ \ + return _Tpvec(MSA_TPV_REINTERPRET(_Tpv, msa_extq_##suffix(MSA_TPV_REINTERPRET(_Tpvs, a.val), msa_dupq_n_##suffix(0), n))); \ +} \ +template inline _Tpvec v_rotate_left(const _Tpvec& a) \ +{ \ + return _Tpvec(MSA_TPV_REINTERPRET(_Tpv, msa_extq_##suffix(msa_dupq_n_##suffix(0), MSA_TPV_REINTERPRET(_Tpvs, a.val), _Tpvec::nlanes - n))); \ +} \ +template<> inline _Tpvec v_rotate_left<0>(const _Tpvec& a) \ +{ \ + return a; \ +} \ +template inline _Tpvec v_rotate_right(const _Tpvec& a, const _Tpvec& b) \ +{ \ + return _Tpvec(MSA_TPV_REINTERPRET(_Tpv, msa_extq_##suffix(MSA_TPV_REINTERPRET(_Tpvs, a.val), MSA_TPV_REINTERPRET(_Tpvs, b.val), n))); \ +} \ +template inline _Tpvec v_rotate_left(const _Tpvec& a, const _Tpvec& b) \ +{ \ + return _Tpvec(MSA_TPV_REINTERPRET(_Tpv, msa_extq_##suffix(MSA_TPV_REINTERPRET(_Tpvs, b.val), MSA_TPV_REINTERPRET(_Tpvs, a.val), _Tpvec::nlanes - n))); \ +} \ +template<> inline _Tpvec v_rotate_left<0>(const _Tpvec& a, const _Tpvec& b) \ +{ \ + CV_UNUSED(b); \ + return a; \ +} + +OPENCV_HAL_IMPL_MSA_ROTATE_OP(v_uint8x16, v16u8, v16i8, s8) +OPENCV_HAL_IMPL_MSA_ROTATE_OP(v_int8x16, v16i8, v16i8, s8) +OPENCV_HAL_IMPL_MSA_ROTATE_OP(v_uint16x8, v8u16, v8i16, s16) +OPENCV_HAL_IMPL_MSA_ROTATE_OP(v_int16x8, v8i16, v8i16, s16) +OPENCV_HAL_IMPL_MSA_ROTATE_OP(v_uint32x4, v4u32, v4i32, s32) +OPENCV_HAL_IMPL_MSA_ROTATE_OP(v_int32x4, v4i32, v4i32, s32) +OPENCV_HAL_IMPL_MSA_ROTATE_OP(v_float32x4, v4f32, v4i32, s32) +OPENCV_HAL_IMPL_MSA_ROTATE_OP(v_uint64x2, v2u64, v2i64, s64) +OPENCV_HAL_IMPL_MSA_ROTATE_OP(v_int64x2, v2i64, v2i64, s64) +OPENCV_HAL_IMPL_MSA_ROTATE_OP(v_float64x2, v2f64, v2i64, s64) + +#define OPENCV_HAL_IMPL_MSA_LOADSTORE_OP(_Tpvec, _Tp, suffix) \ +inline _Tpvec v_load(const _Tp* ptr) \ +{ return _Tpvec(msa_ld1q_##suffix(ptr)); } \ +inline _Tpvec v_load_aligned(const _Tp* ptr) \ +{ return _Tpvec(msa_ld1q_##suffix(ptr)); } \ +inline _Tpvec v_load_low(const _Tp* ptr) \ +{ return _Tpvec(msa_combine_##suffix(msa_ld1_##suffix(ptr), msa_dup_n_##suffix((_Tp)0))); } \ +inline _Tpvec v_load_halves(const _Tp* ptr0, const _Tp* ptr1) \ +{ return _Tpvec(msa_combine_##suffix(msa_ld1_##suffix(ptr0), msa_ld1_##suffix(ptr1))); } \ +inline void v_store(_Tp* ptr, const _Tpvec& a) \ +{ msa_st1q_##suffix(ptr, a.val); } \ +inline void v_store_aligned(_Tp* ptr, const _Tpvec& a) \ +{ msa_st1q_##suffix(ptr, a.val); } \ +inline void v_store_aligned_nocache(_Tp* ptr, const _Tpvec& a) \ +{ msa_st1q_##suffix(ptr, a.val); } \ +inline void v_store(_Tp* ptr, const _Tpvec& a, hal::StoreMode /*mode*/) \ +{ msa_st1q_##suffix(ptr, a.val); } \ +inline void v_store_low(_Tp* ptr, const _Tpvec& a) \ +{ \ + int n = _Tpvec::nlanes; \ + for( int i = 0; i < (n/2); i++ ) \ + ptr[i] = a.val[i]; \ +} \ +inline void v_store_high(_Tp* ptr, const _Tpvec& a) \ +{ \ + int n = _Tpvec::nlanes; \ + for( int i = 0; i < (n/2); i++ ) \ + ptr[i] = a.val[i+(n/2)]; \ +} + +OPENCV_HAL_IMPL_MSA_LOADSTORE_OP(v_uint8x16, uchar, u8) +OPENCV_HAL_IMPL_MSA_LOADSTORE_OP(v_int8x16, schar, s8) +OPENCV_HAL_IMPL_MSA_LOADSTORE_OP(v_uint16x8, ushort, u16) +OPENCV_HAL_IMPL_MSA_LOADSTORE_OP(v_int16x8, short, s16) +OPENCV_HAL_IMPL_MSA_LOADSTORE_OP(v_uint32x4, unsigned, u32) +OPENCV_HAL_IMPL_MSA_LOADSTORE_OP(v_int32x4, int, s32) +OPENCV_HAL_IMPL_MSA_LOADSTORE_OP(v_uint64x2, uint64, u64) +OPENCV_HAL_IMPL_MSA_LOADSTORE_OP(v_int64x2, int64, s64) +OPENCV_HAL_IMPL_MSA_LOADSTORE_OP(v_float32x4, float, f32) +OPENCV_HAL_IMPL_MSA_LOADSTORE_OP(v_float64x2, double, f64) + + +/** Reverse **/ +inline v_uint8x16 v_reverse(const v_uint8x16 &a) +{ + v_uint8x16 c = v_uint8x16((v16u8)__builtin_msa_vshf_b((v16i8)((v2i64){0x08090A0B0C0D0E0F, 0x0001020304050607}), msa_dupq_n_s8(0), (v16i8)a.val)); + return c; +} + +inline v_int8x16 v_reverse(const v_int8x16 &a) +{ return v_reinterpret_as_s8(v_reverse(v_reinterpret_as_u8(a))); } + +inline v_uint16x8 v_reverse(const v_uint16x8 &a) +{ + v_uint16x8 c = v_uint16x8((v8u16)__builtin_msa_vshf_h((v8i16)((v2i64){0x0004000500060007, 0x0000000100020003}), msa_dupq_n_s16(0), (v8i16)a.val)); + return c; +} + +inline v_int16x8 v_reverse(const v_int16x8 &a) +{ return v_reinterpret_as_s16(v_reverse(v_reinterpret_as_u16(a))); } + +inline v_uint32x4 v_reverse(const v_uint32x4 &a) +{ + v_uint32x4 c; + c.val[0] = a.val[3]; + c.val[1] = a.val[2]; + c.val[2] = a.val[1]; + c.val[3] = a.val[0]; + return c; +} + +inline v_int32x4 v_reverse(const v_int32x4 &a) +{ return v_reinterpret_as_s32(v_reverse(v_reinterpret_as_u32(a))); } + +inline v_float32x4 v_reverse(const v_float32x4 &a) +{ return v_reinterpret_as_f32(v_reverse(v_reinterpret_as_u32(a))); } + +inline v_uint64x2 v_reverse(const v_uint64x2 &a) +{ + v_uint64x2 c; + c.val[0] = a.val[1]; + c.val[1] = a.val[0]; + return c; +} + +inline v_int64x2 v_reverse(const v_int64x2 &a) +{ return v_reinterpret_as_s64(v_reverse(v_reinterpret_as_u64(a))); } + +inline v_float64x2 v_reverse(const v_float64x2 &a) +{ return v_reinterpret_as_f64(v_reverse(v_reinterpret_as_u64(a))); } + + +#define OPENCV_HAL_IMPL_MSA_REDUCE_OP_8U(func, cfunc) \ +inline unsigned short v_reduce_##func(const v_uint16x8& a) \ +{ \ + v8u16 a_lo, a_hi; \ + ILVRL_H2_UH(a.val, msa_dupq_n_u16(0), a_lo, a_hi); \ + v4u32 b = msa_##func##q_u32(msa_paddlq_u16(a_lo), msa_paddlq_u16(a_hi)); \ + v4u32 b_lo, b_hi; \ + ILVRL_W2_UW(b, msa_dupq_n_u32(0), b_lo, b_hi); \ + v2u64 c = msa_##func##q_u64(msa_paddlq_u32(b_lo), msa_paddlq_u32(b_hi)); \ + return (unsigned short)cfunc(c[0], c[1]); \ +} + +OPENCV_HAL_IMPL_MSA_REDUCE_OP_8U(max, std::max) +OPENCV_HAL_IMPL_MSA_REDUCE_OP_8U(min, std::min) + +#define OPENCV_HAL_IMPL_MSA_REDUCE_OP_8S(func, cfunc) \ +inline short v_reduce_##func(const v_int16x8& a) \ +{ \ + v8i16 a_lo, a_hi; \ + ILVRL_H2_SH(a.val, msa_dupq_n_s16(0), a_lo, a_hi); \ + v4i32 b = msa_##func##q_s32(msa_paddlq_s16(a_lo), msa_paddlq_s16(a_hi)); \ + v4i32 b_lo, b_hi; \ + ILVRL_W2_SW(b, msa_dupq_n_s32(0), b_lo, b_hi); \ + v2i64 c = msa_##func##q_s64(msa_paddlq_s32(b_lo), msa_paddlq_s32(b_hi)); \ + return (short)cfunc(c[0], c[1]); \ +} + +OPENCV_HAL_IMPL_MSA_REDUCE_OP_8S(max, std::max) +OPENCV_HAL_IMPL_MSA_REDUCE_OP_8S(min, std::min) + +#define OPENCV_HAL_IMPL_MSA_REDUCE_OP_4(_Tpvec, scalartype, func, cfunc) \ +inline scalartype v_reduce_##func(const _Tpvec& a) \ +{ \ + return (scalartype)cfunc(cfunc(a.val[0], a.val[1]), cfunc(a.val[2], a.val[3])); \ +} + +OPENCV_HAL_IMPL_MSA_REDUCE_OP_4(v_uint32x4, unsigned, max, std::max) +OPENCV_HAL_IMPL_MSA_REDUCE_OP_4(v_uint32x4, unsigned, min, std::min) +OPENCV_HAL_IMPL_MSA_REDUCE_OP_4(v_int32x4, int, max, std::max) +OPENCV_HAL_IMPL_MSA_REDUCE_OP_4(v_int32x4, int, min, std::min) +OPENCV_HAL_IMPL_MSA_REDUCE_OP_4(v_float32x4, float, max, std::max) +OPENCV_HAL_IMPL_MSA_REDUCE_OP_4(v_float32x4, float, min, std::min) + + +#define OPENCV_HAL_IMPL_MSA_REDUCE_OP_16(_Tpvec, scalartype, _Tpvec2, func) \ +inline scalartype v_reduce_##func(const _Tpvec& a) \ +{ \ + _Tpvec2 a1, a2; \ + v_expand(a, a1, a2); \ + return (scalartype)v_reduce_##func(v_##func(a1, a2)); \ +} + +OPENCV_HAL_IMPL_MSA_REDUCE_OP_16(v_uint8x16, uchar, v_uint16x8, min) +OPENCV_HAL_IMPL_MSA_REDUCE_OP_16(v_uint8x16, uchar, v_uint16x8, max) +OPENCV_HAL_IMPL_MSA_REDUCE_OP_16(v_int8x16, char, v_int16x8, min) +OPENCV_HAL_IMPL_MSA_REDUCE_OP_16(v_int8x16, char, v_int16x8, max) + + + +#define OPENCV_HAL_IMPL_MSA_REDUCE_SUM(_Tpvec, scalartype, suffix) \ +inline scalartype v_reduce_sum(const _Tpvec& a) \ +{ \ + return (scalartype)msa_sum_##suffix(a.val); \ +} + +OPENCV_HAL_IMPL_MSA_REDUCE_SUM(v_uint8x16, unsigned char, u8) +OPENCV_HAL_IMPL_MSA_REDUCE_SUM(v_int8x16, char, s8) +OPENCV_HAL_IMPL_MSA_REDUCE_SUM(v_uint16x8, unsigned short, u16) +OPENCV_HAL_IMPL_MSA_REDUCE_SUM(v_int16x8, short, s16) +OPENCV_HAL_IMPL_MSA_REDUCE_SUM(v_uint32x4, unsigned, u32) +OPENCV_HAL_IMPL_MSA_REDUCE_SUM(v_int32x4, int, s32) +OPENCV_HAL_IMPL_MSA_REDUCE_SUM(v_float32x4, float, f32) + +inline uint64 v_reduce_sum(const v_uint64x2& a) +{ return (uint64)(msa_getq_lane_u64(a.val, 0) + msa_getq_lane_u64(a.val, 1)); } +inline int64 v_reduce_sum(const v_int64x2& a) +{ return (int64)(msa_getq_lane_s64(a.val, 0) + msa_getq_lane_s64(a.val, 1)); } +inline double v_reduce_sum(const v_float64x2& a) +{ + return msa_getq_lane_f64(a.val, 0) + msa_getq_lane_f64(a.val, 1); +} + +/* v_reduce_sum4, v_reduce_sad */ +inline v_float32x4 v_reduce_sum4(const v_float32x4& a, const v_float32x4& b, + const v_float32x4& c, const v_float32x4& d) +{ + v4f32 u0 = msa_addq_f32(MSA_TPV_REINTERPRET(v4f32, msa_ilvevq_s32(MSA_TPV_REINTERPRET(v4i32, b.val), MSA_TPV_REINTERPRET(v4i32, a.val))), + MSA_TPV_REINTERPRET(v4f32, msa_ilvodq_s32(MSA_TPV_REINTERPRET(v4i32, b.val), MSA_TPV_REINTERPRET(v4i32, a.val)))); // a0+a1 b0+b1 a2+a3 b2+b3 + v4f32 u1 = msa_addq_f32(MSA_TPV_REINTERPRET(v4f32, msa_ilvevq_s32(MSA_TPV_REINTERPRET(v4i32, d.val), MSA_TPV_REINTERPRET(v4i32, c.val))), + MSA_TPV_REINTERPRET(v4f32, msa_ilvodq_s32(MSA_TPV_REINTERPRET(v4i32, d.val), MSA_TPV_REINTERPRET(v4i32, c.val)))); // c0+c1 d0+d1 c2+c3 d2+d3 + + return v_float32x4(msa_addq_f32(MSA_TPV_REINTERPRET(v4f32, msa_ilvrq_s64(MSA_TPV_REINTERPRET(v2i64, u1), MSA_TPV_REINTERPRET(v2i64, u0))), + MSA_TPV_REINTERPRET(v4f32, msa_ilvlq_s64(MSA_TPV_REINTERPRET(v2i64, u1), MSA_TPV_REINTERPRET(v2i64, u0))))); +} + +inline unsigned v_reduce_sad(const v_uint8x16& a, const v_uint8x16& b) +{ + v16u8 t0 = msa_abdq_u8(a.val, b.val); + v8u16 t1 = msa_paddlq_u8(t0); + v4u32 t2 = msa_paddlq_u16(t1); + return msa_sum_u32(t2); +} +inline unsigned v_reduce_sad(const v_int8x16& a, const v_int8x16& b) +{ + v16u8 t0 = MSA_TPV_REINTERPRET(v16u8, msa_abdq_s8(a.val, b.val)); + v8u16 t1 = msa_paddlq_u8(t0); + v4u32 t2 = msa_paddlq_u16(t1); + return msa_sum_u32(t2); +} +inline unsigned v_reduce_sad(const v_uint16x8& a, const v_uint16x8& b) +{ + v8u16 t0 = msa_abdq_u16(a.val, b.val); + v4u32 t1 = msa_paddlq_u16(t0); + return msa_sum_u32(t1); +} +inline unsigned v_reduce_sad(const v_int16x8& a, const v_int16x8& b) +{ + v8u16 t0 = MSA_TPV_REINTERPRET(v8u16, msa_abdq_s16(a.val, b.val)); + v4u32 t1 = msa_paddlq_u16(t0); + return msa_sum_u32(t1); +} +inline unsigned v_reduce_sad(const v_uint32x4& a, const v_uint32x4& b) +{ + v4u32 t0 = msa_abdq_u32(a.val, b.val); + return msa_sum_u32(t0); +} +inline unsigned v_reduce_sad(const v_int32x4& a, const v_int32x4& b) +{ + v4u32 t0 = MSA_TPV_REINTERPRET(v4u32, msa_abdq_s32(a.val, b.val)); + return msa_sum_u32(t0); +} +inline float v_reduce_sad(const v_float32x4& a, const v_float32x4& b) +{ + v4f32 t0 = msa_abdq_f32(a.val, b.val); + return msa_sum_f32(t0); +} + +/* v_popcount */ +#define OPENCV_HAL_IMPL_MSA_POPCOUNT_SIZE8(_Tpvec) \ +inline v_uint8x16 v_popcount(const _Tpvec& a) \ +{ \ + v16u8 t = MSA_TPV_REINTERPRET(v16u8, msa_cntq_s8(MSA_TPV_REINTERPRET(v16i8, a.val))); \ + return v_uint8x16(t); \ +} +OPENCV_HAL_IMPL_MSA_POPCOUNT_SIZE8(v_uint8x16) +OPENCV_HAL_IMPL_MSA_POPCOUNT_SIZE8(v_int8x16) + +#define OPENCV_HAL_IMPL_MSA_POPCOUNT_SIZE16(_Tpvec) \ +inline v_uint16x8 v_popcount(const _Tpvec& a) \ +{ \ + v8u16 t = MSA_TPV_REINTERPRET(v8u16, msa_cntq_s16(MSA_TPV_REINTERPRET(v8i16, a.val))); \ + return v_uint16x8(t); \ +} +OPENCV_HAL_IMPL_MSA_POPCOUNT_SIZE16(v_uint16x8) +OPENCV_HAL_IMPL_MSA_POPCOUNT_SIZE16(v_int16x8) + +#define OPENCV_HAL_IMPL_MSA_POPCOUNT_SIZE32(_Tpvec) \ +inline v_uint32x4 v_popcount(const _Tpvec& a) \ +{ \ + v4u32 t = MSA_TPV_REINTERPRET(v4u32, msa_cntq_s32(MSA_TPV_REINTERPRET(v4i32, a.val))); \ + return v_uint32x4(t); \ +} +OPENCV_HAL_IMPL_MSA_POPCOUNT_SIZE32(v_uint32x4) +OPENCV_HAL_IMPL_MSA_POPCOUNT_SIZE32(v_int32x4) + +#define OPENCV_HAL_IMPL_MSA_POPCOUNT_SIZE64(_Tpvec) \ +inline v_uint64x2 v_popcount(const _Tpvec& a) \ +{ \ + v2u64 t = MSA_TPV_REINTERPRET(v2u64, msa_cntq_s64(MSA_TPV_REINTERPRET(v2i64, a.val))); \ + return v_uint64x2(t); \ +} +OPENCV_HAL_IMPL_MSA_POPCOUNT_SIZE64(v_uint64x2) +OPENCV_HAL_IMPL_MSA_POPCOUNT_SIZE64(v_int64x2) + +inline int v_signmask(const v_uint8x16& a) +{ + v8i8 m0 = msa_create_s8(CV_BIG_UINT(0x0706050403020100)); + v16u8 v0 = msa_shlq_u8(msa_shrq_n_u8(a.val, 7), msa_combine_s8(m0, m0)); + v8u16 v1 = msa_paddlq_u8(v0); + v4u32 v2 = msa_paddlq_u16(v1); + v2u64 v3 = msa_paddlq_u32(v2); + return (int)msa_getq_lane_u64(v3, 0) + ((int)msa_getq_lane_u64(v3, 1) << 8); +} +inline int v_signmask(const v_int8x16& a) +{ return v_signmask(v_reinterpret_as_u8(a)); } + +inline int v_signmask(const v_uint16x8& a) +{ + v4i16 m0 = msa_create_s16(CV_BIG_UINT(0x0003000200010000)); + v8u16 v0 = msa_shlq_u16(msa_shrq_n_u16(a.val, 15), msa_combine_s16(m0, m0)); + v4u32 v1 = msa_paddlq_u16(v0); + v2u64 v2 = msa_paddlq_u32(v1); + return (int)msa_getq_lane_u64(v2, 0) + ((int)msa_getq_lane_u64(v2, 1) << 4); +} +inline int v_signmask(const v_int16x8& a) +{ return v_signmask(v_reinterpret_as_u16(a)); } + +inline int v_signmask(const v_uint32x4& a) +{ + v2i32 m0 = msa_create_s32(CV_BIG_UINT(0x0000000100000000)); + v4u32 v0 = msa_shlq_u32(msa_shrq_n_u32(a.val, 31), msa_combine_s32(m0, m0)); + v2u64 v1 = msa_paddlq_u32(v0); + return (int)msa_getq_lane_u64(v1, 0) + ((int)msa_getq_lane_u64(v1, 1) << 2); +} +inline int v_signmask(const v_int32x4& a) +{ return v_signmask(v_reinterpret_as_u32(a)); } +inline int v_signmask(const v_float32x4& a) +{ return v_signmask(v_reinterpret_as_u32(a)); } + +inline int v_signmask(const v_uint64x2& a) +{ + v2u64 v0 = msa_shrq_n_u64(a.val, 63); + return (int)msa_getq_lane_u64(v0, 0) + ((int)msa_getq_lane_u64(v0, 1) << 1); +} +inline int v_signmask(const v_int64x2& a) +{ return v_signmask(v_reinterpret_as_u64(a)); } +inline int v_signmask(const v_float64x2& a) +{ return v_signmask(v_reinterpret_as_u64(a)); } + +inline int v_scan_forward(const v_int8x16& a) { return trailingZeros32(v_signmask(a)); } +inline int v_scan_forward(const v_uint8x16& a) { return trailingZeros32(v_signmask(a)); } +inline int v_scan_forward(const v_int16x8& a) { return trailingZeros32(v_signmask(a)); } +inline int v_scan_forward(const v_uint16x8& a) { return trailingZeros32(v_signmask(a)); } +inline int v_scan_forward(const v_int32x4& a) { return trailingZeros32(v_signmask(a)); } +inline int v_scan_forward(const v_uint32x4& a) { return trailingZeros32(v_signmask(a)); } +inline int v_scan_forward(const v_float32x4& a) { return trailingZeros32(v_signmask(a)); } +inline int v_scan_forward(const v_int64x2& a) { return trailingZeros32(v_signmask(a)); } +inline int v_scan_forward(const v_uint64x2& a) { return trailingZeros32(v_signmask(a)); } +inline int v_scan_forward(const v_float64x2& a) { return trailingZeros32(v_signmask(a)); } + +#define OPENCV_HAL_IMPL_MSA_CHECK_ALLANY(_Tpvec, _Tpvec2, suffix, shift) \ +inline bool v_check_all(const v_##_Tpvec& a) \ +{ \ + _Tpvec2 v0 = msa_shrq_n_##suffix(msa_mvnq_##suffix(a.val), shift); \ + v2u64 v1 = MSA_TPV_REINTERPRET(v2u64, v0); \ + return (msa_getq_lane_u64(v1, 0) | msa_getq_lane_u64(v1, 1)) == 0; \ +} \ +inline bool v_check_any(const v_##_Tpvec& a) \ +{ \ + _Tpvec2 v0 = msa_shrq_n_##suffix(a.val, shift); \ + v2u64 v1 = MSA_TPV_REINTERPRET(v2u64, v0); \ + return (msa_getq_lane_u64(v1, 0) | msa_getq_lane_u64(v1, 1)) != 0; \ +} + +OPENCV_HAL_IMPL_MSA_CHECK_ALLANY(uint8x16, v16u8, u8, 7) +OPENCV_HAL_IMPL_MSA_CHECK_ALLANY(uint16x8, v8u16, u16, 15) +OPENCV_HAL_IMPL_MSA_CHECK_ALLANY(uint32x4, v4u32, u32, 31) +OPENCV_HAL_IMPL_MSA_CHECK_ALLANY(uint64x2, v2u64, u64, 63) + +inline bool v_check_all(const v_int8x16& a) +{ return v_check_all(v_reinterpret_as_u8(a)); } +inline bool v_check_all(const v_int16x8& a) +{ return v_check_all(v_reinterpret_as_u16(a)); } +inline bool v_check_all(const v_int32x4& a) +{ return v_check_all(v_reinterpret_as_u32(a)); } +inline bool v_check_all(const v_float32x4& a) +{ return v_check_all(v_reinterpret_as_u32(a)); } + +inline bool v_check_any(const v_int8x16& a) +{ return v_check_any(v_reinterpret_as_u8(a)); } +inline bool v_check_any(const v_int16x8& a) +{ return v_check_any(v_reinterpret_as_u16(a)); } +inline bool v_check_any(const v_int32x4& a) +{ return v_check_any(v_reinterpret_as_u32(a)); } +inline bool v_check_any(const v_float32x4& a) +{ return v_check_any(v_reinterpret_as_u32(a)); } + +inline bool v_check_all(const v_int64x2& a) +{ return v_check_all(v_reinterpret_as_u64(a)); } +inline bool v_check_all(const v_float64x2& a) +{ return v_check_all(v_reinterpret_as_u64(a)); } +inline bool v_check_any(const v_int64x2& a) +{ return v_check_any(v_reinterpret_as_u64(a)); } +inline bool v_check_any(const v_float64x2& a) +{ return v_check_any(v_reinterpret_as_u64(a)); } + +/* v_select */ +#define OPENCV_HAL_IMPL_MSA_SELECT(_Tpvec, _Tpv, _Tpvu) \ +inline _Tpvec v_select(const _Tpvec& mask, const _Tpvec& a, const _Tpvec& b) \ +{ \ + return _Tpvec(MSA_TPV_REINTERPRET(_Tpv, msa_bslq_u8(MSA_TPV_REINTERPRET(_Tpvu, mask.val), \ + MSA_TPV_REINTERPRET(_Tpvu, b.val), MSA_TPV_REINTERPRET(_Tpvu, a.val)))); \ +} + +OPENCV_HAL_IMPL_MSA_SELECT(v_uint8x16, v16u8, v16u8) +OPENCV_HAL_IMPL_MSA_SELECT(v_int8x16, v16i8, v16u8) +OPENCV_HAL_IMPL_MSA_SELECT(v_uint16x8, v8u16, v16u8) +OPENCV_HAL_IMPL_MSA_SELECT(v_int16x8, v8i16, v16u8) +OPENCV_HAL_IMPL_MSA_SELECT(v_uint32x4, v4u32, v16u8) +OPENCV_HAL_IMPL_MSA_SELECT(v_int32x4, v4i32, v16u8) +OPENCV_HAL_IMPL_MSA_SELECT(v_float32x4, v4f32, v16u8) +OPENCV_HAL_IMPL_MSA_SELECT(v_float64x2, v2f64, v16u8) + +#define OPENCV_HAL_IMPL_MSA_EXPAND(_Tpvec, _Tpwvec, _Tp, suffix, ssuffix, _Tpv, _Tpvs) \ +inline void v_expand(const _Tpvec& a, _Tpwvec& b0, _Tpwvec& b1) \ +{ \ + _Tpv a_lo = MSA_TPV_REINTERPRET(_Tpv, msa_ilvrq_##ssuffix(MSA_TPV_REINTERPRET(_Tpvs, a.val), msa_dupq_n_##ssuffix(0))); \ + _Tpv a_hi = MSA_TPV_REINTERPRET(_Tpv, msa_ilvlq_##ssuffix(MSA_TPV_REINTERPRET(_Tpvs, a.val), msa_dupq_n_##ssuffix(0))); \ + b0.val = msa_paddlq_##suffix(a_lo); \ + b1.val = msa_paddlq_##suffix(a_hi); \ +} \ +inline _Tpwvec v_expand_low(const _Tpvec& a) \ +{ \ + _Tpv a_lo = MSA_TPV_REINTERPRET(_Tpv, msa_ilvrq_##ssuffix(MSA_TPV_REINTERPRET(_Tpvs, a.val), msa_dupq_n_##ssuffix(0))); \ + return _Tpwvec(msa_paddlq_##suffix(a_lo)); \ +} \ +inline _Tpwvec v_expand_high(const _Tpvec& a) \ +{ \ + _Tpv a_hi = MSA_TPV_REINTERPRET(_Tpv, msa_ilvlq_##ssuffix(MSA_TPV_REINTERPRET(_Tpvs, a.val), msa_dupq_n_##ssuffix(0))); \ + return _Tpwvec(msa_paddlq_##suffix(a_hi)); \ +} \ +inline _Tpwvec v_load_expand(const _Tp* ptr) \ +{ \ + return _Tpwvec(msa_movl_##suffix(msa_ld1_##suffix(ptr))); \ +} + +OPENCV_HAL_IMPL_MSA_EXPAND(v_uint8x16, v_uint16x8, uchar, u8, s8, v16u8, v16i8) +OPENCV_HAL_IMPL_MSA_EXPAND(v_int8x16, v_int16x8, schar, s8, s8, v16i8, v16i8) +OPENCV_HAL_IMPL_MSA_EXPAND(v_uint16x8, v_uint32x4, ushort, u16, s16, v8u16, v8i16) +OPENCV_HAL_IMPL_MSA_EXPAND(v_int16x8, v_int32x4, short, s16, s16, v8i16, v8i16) +OPENCV_HAL_IMPL_MSA_EXPAND(v_uint32x4, v_uint64x2, uint, u32, s32, v4u32, v4i32) +OPENCV_HAL_IMPL_MSA_EXPAND(v_int32x4, v_int64x2, int, s32, s32, v4i32, v4i32) + +inline v_uint32x4 v_load_expand_q(const uchar* ptr) +{ + return v_uint32x4((v4u32){ptr[0], ptr[1], ptr[2], ptr[3]}); +} + +inline v_int32x4 v_load_expand_q(const schar* ptr) +{ + return v_int32x4((v4i32){ptr[0], ptr[1], ptr[2], ptr[3]}); +} + +/* v_zip, v_combine_low, v_combine_high, v_recombine */ +#define OPENCV_HAL_IMPL_MSA_UNPACKS(_Tpvec, _Tpv, _Tpvs, ssuffix) \ +inline void v_zip(const _Tpvec& a0, const _Tpvec& a1, _Tpvec& b0, _Tpvec& b1) \ +{ \ + b0.val = MSA_TPV_REINTERPRET(_Tpv, msa_ilvrq_##ssuffix(MSA_TPV_REINTERPRET(_Tpvs, a1.val), MSA_TPV_REINTERPRET(_Tpvs, a0.val))); \ + b1.val = MSA_TPV_REINTERPRET(_Tpv, msa_ilvlq_##ssuffix(MSA_TPV_REINTERPRET(_Tpvs, a1.val), MSA_TPV_REINTERPRET(_Tpvs, a0.val))); \ +} \ +inline _Tpvec v_combine_low(const _Tpvec& a, const _Tpvec& b) \ +{ \ + return _Tpvec(MSA_TPV_REINTERPRET(_Tpv, msa_ilvrq_s64(MSA_TPV_REINTERPRET(v2i64, b.val), MSA_TPV_REINTERPRET(v2i64, a.val)))); \ +} \ +inline _Tpvec v_combine_high(const _Tpvec& a, const _Tpvec& b) \ +{ \ + return _Tpvec(MSA_TPV_REINTERPRET(_Tpv, msa_ilvlq_s64(MSA_TPV_REINTERPRET(v2i64, b.val), MSA_TPV_REINTERPRET(v2i64, a.val)))); \ +} \ +inline void v_recombine(const _Tpvec& a, const _Tpvec& b, _Tpvec& c, _Tpvec& d) \ +{ \ + c.val = MSA_TPV_REINTERPRET(_Tpv, msa_ilvrq_s64(MSA_TPV_REINTERPRET(v2i64, b.val), MSA_TPV_REINTERPRET(v2i64, a.val))); \ + d.val = MSA_TPV_REINTERPRET(_Tpv, msa_ilvlq_s64(MSA_TPV_REINTERPRET(v2i64, b.val), MSA_TPV_REINTERPRET(v2i64, a.val))); \ +} + +OPENCV_HAL_IMPL_MSA_UNPACKS(v_uint8x16, v16u8, v16i8, s8) +OPENCV_HAL_IMPL_MSA_UNPACKS(v_int8x16, v16i8, v16i8, s8) +OPENCV_HAL_IMPL_MSA_UNPACKS(v_uint16x8, v8u16, v8i16, s16) +OPENCV_HAL_IMPL_MSA_UNPACKS(v_int16x8, v8i16, v8i16, s16) +OPENCV_HAL_IMPL_MSA_UNPACKS(v_uint32x4, v4u32, v4i32, s32) +OPENCV_HAL_IMPL_MSA_UNPACKS(v_int32x4, v4i32, v4i32, s32) +OPENCV_HAL_IMPL_MSA_UNPACKS(v_float32x4, v4f32, v4i32, s32) +OPENCV_HAL_IMPL_MSA_UNPACKS(v_float64x2, v2f64, v2i64, s64) + +/* v_extract */ +#define OPENCV_HAL_IMPL_MSA_EXTRACT(_Tpvec, _Tpv, _Tpvs, suffix) \ +template \ +inline _Tpvec v_extract(const _Tpvec& a, const _Tpvec& b) \ +{ \ + return _Tpvec(MSA_TPV_REINTERPRET(_Tpv, msa_extq_##suffix(MSA_TPV_REINTERPRET(_Tpvs, a.val), MSA_TPV_REINTERPRET(_Tpvs, b.val), s))); \ +} + +OPENCV_HAL_IMPL_MSA_EXTRACT(v_uint8x16, v16u8, v16i8, s8) +OPENCV_HAL_IMPL_MSA_EXTRACT(v_int8x16, v16i8, v16i8, s8) +OPENCV_HAL_IMPL_MSA_EXTRACT(v_uint16x8, v8u16, v8i16, s16) +OPENCV_HAL_IMPL_MSA_EXTRACT(v_int16x8, v8i16, v8i16, s16) +OPENCV_HAL_IMPL_MSA_EXTRACT(v_uint32x4, v4u32, v4i32, s32) +OPENCV_HAL_IMPL_MSA_EXTRACT(v_int32x4, v4i32, v4i32, s32) +OPENCV_HAL_IMPL_MSA_EXTRACT(v_uint64x2, v2u64, v2i64, s64) +OPENCV_HAL_IMPL_MSA_EXTRACT(v_int64x2, v2i64, v2i64, s64) +OPENCV_HAL_IMPL_MSA_EXTRACT(v_float32x4, v4f32, v4i32, s32) +OPENCV_HAL_IMPL_MSA_EXTRACT(v_float64x2, v2f64, v2i64, s64) + +/* v_round, v_floor, v_ceil, v_trunc */ +inline v_int32x4 v_round(const v_float32x4& a) +{ + return v_int32x4(msa_cvttintq_s32_f32(a.val)); +} + +inline v_int32x4 v_floor(const v_float32x4& a) +{ + v4i32 a1 = msa_cvttintq_s32_f32(a.val); + return v_int32x4(msa_addq_s32(a1, MSA_TPV_REINTERPRET(v4i32, msa_cgtq_f32(msa_cvtfintq_f32_s32(a1), a.val)))); +} + +inline v_int32x4 v_ceil(const v_float32x4& a) +{ + v4i32 a1 = msa_cvttintq_s32_f32(a.val); + return v_int32x4(msa_subq_s32(a1, MSA_TPV_REINTERPRET(v4i32, msa_cgtq_f32(a.val, msa_cvtfintq_f32_s32(a1))))); +} + +inline v_int32x4 v_trunc(const v_float32x4& a) +{ + return v_int32x4(msa_cvttruncq_s32_f32(a.val)); +} + +inline v_int32x4 v_round(const v_float64x2& a) +{ + return v_int32x4(msa_pack_s64(msa_cvttintq_s64_f64(a.val), msa_dupq_n_s64(0))); +} + +inline v_int32x4 v_round(const v_float64x2& a, const v_float64x2& b) +{ + return v_int32x4(msa_pack_s64(msa_cvttintq_s64_f64(a.val), msa_cvttintq_s64_f64(b.val))); +} + +inline v_int32x4 v_floor(const v_float64x2& a) +{ + v2f64 a1 = msa_cvtrintq_f64(a.val); + return v_int32x4(msa_pack_s64(msa_addq_s64(msa_cvttruncq_s64_f64(a1), MSA_TPV_REINTERPRET(v2i64, msa_cgtq_f64(a1, a.val))), msa_dupq_n_s64(0))); +} + +inline v_int32x4 v_ceil(const v_float64x2& a) +{ + v2f64 a1 = msa_cvtrintq_f64(a.val); + return v_int32x4(msa_pack_s64(msa_subq_s64(msa_cvttruncq_s64_f64(a1), MSA_TPV_REINTERPRET(v2i64, msa_cgtq_f64(a.val, a1))), msa_dupq_n_s64(0))); +} + +inline v_int32x4 v_trunc(const v_float64x2& a) +{ + return v_int32x4(msa_pack_s64(msa_cvttruncq_s64_f64(a.val), msa_dupq_n_s64(0))); +} + +#define OPENCV_HAL_IMPL_MSA_TRANSPOSE4x4(_Tpvec, _Tpv, _Tpvs, ssuffix) \ +inline void v_transpose4x4(const _Tpvec& a0, const _Tpvec& a1, \ + const _Tpvec& a2, const _Tpvec& a3, \ + _Tpvec& b0, _Tpvec& b1, \ + _Tpvec& b2, _Tpvec& b3) \ +{ \ + _Tpv t00 = MSA_TPV_REINTERPRET(_Tpv, msa_ilvrq_##ssuffix(MSA_TPV_REINTERPRET(_Tpvs, a1.val), MSA_TPV_REINTERPRET(_Tpvs, a0.val))); \ + _Tpv t01 = MSA_TPV_REINTERPRET(_Tpv, msa_ilvlq_##ssuffix(MSA_TPV_REINTERPRET(_Tpvs, a1.val), MSA_TPV_REINTERPRET(_Tpvs, a0.val))); \ + _Tpv t10 = MSA_TPV_REINTERPRET(_Tpv, msa_ilvrq_##ssuffix(MSA_TPV_REINTERPRET(_Tpvs, a3.val), MSA_TPV_REINTERPRET(_Tpvs, a2.val))); \ + _Tpv t11 = MSA_TPV_REINTERPRET(_Tpv, msa_ilvlq_##ssuffix(MSA_TPV_REINTERPRET(_Tpvs, a3.val), MSA_TPV_REINTERPRET(_Tpvs, a2.val))); \ + b0.val = MSA_TPV_REINTERPRET(_Tpv, msa_ilvrq_s64(MSA_TPV_REINTERPRET(v2i64, t10), MSA_TPV_REINTERPRET(v2i64, t00))); \ + b1.val = MSA_TPV_REINTERPRET(_Tpv, msa_ilvlq_s64(MSA_TPV_REINTERPRET(v2i64, t10), MSA_TPV_REINTERPRET(v2i64, t00))); \ + b2.val = MSA_TPV_REINTERPRET(_Tpv, msa_ilvrq_s64(MSA_TPV_REINTERPRET(v2i64, t11), MSA_TPV_REINTERPRET(v2i64, t01))); \ + b3.val = MSA_TPV_REINTERPRET(_Tpv, msa_ilvlq_s64(MSA_TPV_REINTERPRET(v2i64, t11), MSA_TPV_REINTERPRET(v2i64, t01))); \ +} + +OPENCV_HAL_IMPL_MSA_TRANSPOSE4x4(v_uint32x4, v4u32, v4i32, s32) +OPENCV_HAL_IMPL_MSA_TRANSPOSE4x4(v_int32x4, v4i32, v4i32, s32) +OPENCV_HAL_IMPL_MSA_TRANSPOSE4x4(v_float32x4, v4f32, v4i32, s32) + +#define OPENCV_HAL_IMPL_MSA_INTERLEAVED(_Tpvec, _Tp, suffix) \ +inline void v_load_deinterleave(const _Tp* ptr, v_##_Tpvec& a, v_##_Tpvec& b) \ +{ \ + msa_ld2q_##suffix(ptr, &a.val, &b.val); \ +} \ +inline void v_load_deinterleave(const _Tp* ptr, v_##_Tpvec& a, v_##_Tpvec& b, v_##_Tpvec& c) \ +{ \ + msa_ld3q_##suffix(ptr, &a.val, &b.val, &c.val); \ +} \ +inline void v_load_deinterleave(const _Tp* ptr, v_##_Tpvec& a, v_##_Tpvec& b, \ + v_##_Tpvec& c, v_##_Tpvec& d) \ +{ \ + msa_ld4q_##suffix(ptr, &a.val, &b.val, &c.val, &d.val); \ +} \ +inline void v_store_interleave( _Tp* ptr, const v_##_Tpvec& a, const v_##_Tpvec& b, \ + hal::StoreMode /*mode*/=hal::STORE_UNALIGNED) \ +{ \ + msa_st2q_##suffix(ptr, a.val, b.val); \ +} \ +inline void v_store_interleave( _Tp* ptr, const v_##_Tpvec& a, const v_##_Tpvec& b, \ + const v_##_Tpvec& c, hal::StoreMode /*mode*/=hal::STORE_UNALIGNED) \ +{ \ + msa_st3q_##suffix(ptr, a.val, b.val, c.val); \ +} \ +inline void v_store_interleave( _Tp* ptr, const v_##_Tpvec& a, const v_##_Tpvec& b, \ + const v_##_Tpvec& c, const v_##_Tpvec& d, \ + hal::StoreMode /*mode*/=hal::STORE_UNALIGNED ) \ +{ \ + msa_st4q_##suffix(ptr, a.val, b.val, c.val, d.val); \ +} + +OPENCV_HAL_IMPL_MSA_INTERLEAVED(uint8x16, uchar, u8) +OPENCV_HAL_IMPL_MSA_INTERLEAVED(int8x16, schar, s8) +OPENCV_HAL_IMPL_MSA_INTERLEAVED(uint16x8, ushort, u16) +OPENCV_HAL_IMPL_MSA_INTERLEAVED(int16x8, short, s16) +OPENCV_HAL_IMPL_MSA_INTERLEAVED(uint32x4, unsigned, u32) +OPENCV_HAL_IMPL_MSA_INTERLEAVED(int32x4, int, s32) +OPENCV_HAL_IMPL_MSA_INTERLEAVED(float32x4, float, f32) +OPENCV_HAL_IMPL_MSA_INTERLEAVED(uint64x2, uint64, u64) +OPENCV_HAL_IMPL_MSA_INTERLEAVED(int64x2, int64, s64) +OPENCV_HAL_IMPL_MSA_INTERLEAVED(float64x2, double, f64) + +/* v_cvt_f32, v_cvt_f64, v_cvt_f64_high */ +inline v_float32x4 v_cvt_f32(const v_int32x4& a) +{ + return v_float32x4(msa_cvtfintq_f32_s32(a.val)); +} + +inline v_float32x4 v_cvt_f32(const v_float64x2& a) +{ + return v_float32x4(msa_cvtfq_f32_f64(a.val, msa_dupq_n_f64(0.0f))); +} + +inline v_float32x4 v_cvt_f32(const v_float64x2& a, const v_float64x2& b) +{ + return v_float32x4(msa_cvtfq_f32_f64(a.val, b.val)); +} + +inline v_float64x2 v_cvt_f64(const v_int32x4& a) +{ + return v_float64x2(msa_cvtflq_f64_f32(msa_cvtfintq_f32_s32(a.val))); +} + +inline v_float64x2 v_cvt_f64_high(const v_int32x4& a) +{ + return v_float64x2(msa_cvtfhq_f64_f32(msa_cvtfintq_f32_s32(a.val))); +} + +inline v_float64x2 v_cvt_f64(const v_float32x4& a) +{ + return v_float64x2(msa_cvtflq_f64_f32(a.val)); +} + +inline v_float64x2 v_cvt_f64_high(const v_float32x4& a) +{ + return v_float64x2(msa_cvtfhq_f64_f32(a.val)); +} + +inline v_float64x2 v_cvt_f64(const v_int64x2& a) +{ + return v_float64x2(msa_cvtfintq_f64_s64(a.val)); +} + +////////////// Lookup table access //////////////////// +inline v_int8x16 v_lut(const schar* tab, const int* idx) +{ + schar CV_DECL_ALIGNED(32) elems[16] = + { + tab[idx[ 0]], + tab[idx[ 1]], + tab[idx[ 2]], + tab[idx[ 3]], + tab[idx[ 4]], + tab[idx[ 5]], + tab[idx[ 6]], + tab[idx[ 7]], + tab[idx[ 8]], + tab[idx[ 9]], + tab[idx[10]], + tab[idx[11]], + tab[idx[12]], + tab[idx[13]], + tab[idx[14]], + tab[idx[15]] + }; + return v_int8x16(msa_ld1q_s8(elems)); +} +inline v_int8x16 v_lut_pairs(const schar* tab, const int* idx) +{ + schar CV_DECL_ALIGNED(32) elems[16] = + { + tab[idx[0]], + tab[idx[0] + 1], + tab[idx[1]], + tab[idx[1] + 1], + tab[idx[2]], + tab[idx[2] + 1], + tab[idx[3]], + tab[idx[3] + 1], + tab[idx[4]], + tab[idx[4] + 1], + tab[idx[5]], + tab[idx[5] + 1], + tab[idx[6]], + tab[idx[6] + 1], + tab[idx[7]], + tab[idx[7] + 1] + }; + return v_int8x16(msa_ld1q_s8(elems)); +} +inline v_int8x16 v_lut_quads(const schar* tab, const int* idx) +{ + schar CV_DECL_ALIGNED(32) elems[16] = + { + tab[idx[0]], + tab[idx[0] + 1], + tab[idx[0] + 2], + tab[idx[0] + 3], + tab[idx[1]], + tab[idx[1] + 1], + tab[idx[1] + 2], + tab[idx[1] + 3], + tab[idx[2]], + tab[idx[2] + 1], + tab[idx[2] + 2], + tab[idx[2] + 3], + tab[idx[3]], + tab[idx[3] + 1], + tab[idx[3] + 2], + tab[idx[3] + 3] + }; + return v_int8x16(msa_ld1q_s8(elems)); +} +inline v_uint8x16 v_lut(const uchar* tab, const int* idx) { return v_reinterpret_as_u8(v_lut((schar*)tab, idx)); } +inline v_uint8x16 v_lut_pairs(const uchar* tab, const int* idx) { return v_reinterpret_as_u8(v_lut_pairs((schar*)tab, idx)); } +inline v_uint8x16 v_lut_quads(const uchar* tab, const int* idx) { return v_reinterpret_as_u8(v_lut_quads((schar*)tab, idx)); } + + +inline v_int16x8 v_lut(const short* tab, const int* idx) +{ + short CV_DECL_ALIGNED(32) elems[8] = + { + tab[idx[0]], + tab[idx[1]], + tab[idx[2]], + tab[idx[3]], + tab[idx[4]], + tab[idx[5]], + tab[idx[6]], + tab[idx[7]] + }; + return v_int16x8(msa_ld1q_s16(elems)); +} +inline v_int16x8 v_lut_pairs(const short* tab, const int* idx) +{ + short CV_DECL_ALIGNED(32) elems[8] = + { + tab[idx[0]], + tab[idx[0] + 1], + tab[idx[1]], + tab[idx[1] + 1], + tab[idx[2]], + tab[idx[2] + 1], + tab[idx[3]], + tab[idx[3] + 1] + }; + return v_int16x8(msa_ld1q_s16(elems)); +} +inline v_int16x8 v_lut_quads(const short* tab, const int* idx) +{ + return v_int16x8(msa_combine_s16(msa_ld1_s16(tab + idx[0]), msa_ld1_s16(tab + idx[1]))); +} +inline v_uint16x8 v_lut(const ushort* tab, const int* idx) { return v_reinterpret_as_u16(v_lut((short*)tab, idx)); } +inline v_uint16x8 v_lut_pairs(const ushort* tab, const int* idx) { return v_reinterpret_as_u16(v_lut_pairs((short*)tab, idx)); } +inline v_uint16x8 v_lut_quads(const ushort* tab, const int* idx) { return v_reinterpret_as_u16(v_lut_quads((short*)tab, idx)); } + +inline v_int32x4 v_lut(const int* tab, const int* idx) +{ + int CV_DECL_ALIGNED(32) elems[4] = + { + tab[idx[0]], + tab[idx[1]], + tab[idx[2]], + tab[idx[3]] + }; + return v_int32x4(msa_ld1q_s32(elems)); +} +inline v_int32x4 v_lut_pairs(const int* tab, const int* idx) +{ + return v_int32x4(msa_combine_s32(msa_ld1_s32(tab + idx[0]), msa_ld1_s32(tab + idx[1]))); +} +inline v_int32x4 v_lut_quads(const int* tab, const int* idx) +{ + return v_int32x4(msa_ld1q_s32(tab + idx[0])); +} +inline v_uint32x4 v_lut(const unsigned* tab, const int* idx) { return v_reinterpret_as_u32(v_lut((int*)tab, idx)); } +inline v_uint32x4 v_lut_pairs(const unsigned* tab, const int* idx) { return v_reinterpret_as_u32(v_lut_pairs((int*)tab, idx)); } +inline v_uint32x4 v_lut_quads(const unsigned* tab, const int* idx) { return v_reinterpret_as_u32(v_lut_quads((int*)tab, idx)); } + +inline v_int64x2 v_lut(const int64_t* tab, const int* idx) +{ + return v_int64x2(msa_combine_s64(msa_create_s64(tab[idx[0]]), msa_create_s64(tab[idx[1]]))); +} +inline v_int64x2 v_lut_pairs(const int64_t* tab, const int* idx) +{ + return v_int64x2(msa_ld1q_s64(tab + idx[0])); +} +inline v_uint64x2 v_lut(const uint64_t* tab, const int* idx) { return v_reinterpret_as_u64(v_lut((const int64_t *)tab, idx)); } +inline v_uint64x2 v_lut_pairs(const uint64_t* tab, const int* idx) { return v_reinterpret_as_u64(v_lut_pairs((const int64_t *)tab, idx)); } + +inline v_float32x4 v_lut(const float* tab, const int* idx) +{ + float CV_DECL_ALIGNED(32) elems[4] = + { + tab[idx[0]], + tab[idx[1]], + tab[idx[2]], + tab[idx[3]] + }; + return v_float32x4(msa_ld1q_f32(elems)); +} +inline v_float32x4 v_lut_pairs(const float* tab, const int* idx) +{ + uint64 CV_DECL_ALIGNED(32) elems[2] = + { + *(uint64*)(tab + idx[0]), + *(uint64*)(tab + idx[1]) + }; + return v_float32x4(MSA_TPV_REINTERPRET(v4f32, msa_ld1q_u64(elems))); +} +inline v_float32x4 v_lut_quads(const float* tab, const int* idx) +{ + return v_float32x4(msa_ld1q_f32(tab + idx[0])); +} + +inline v_int32x4 v_lut(const int* tab, const v_int32x4& idxvec) +{ + int CV_DECL_ALIGNED(32) idx[4]; + v_store_aligned(idx, idxvec); + + return v_int32x4(tab[idx[0]], tab[idx[1]], tab[idx[2]], tab[idx[3]]); +} + +inline v_uint32x4 v_lut(const unsigned* tab, const v_int32x4& idxvec) +{ + unsigned CV_DECL_ALIGNED(32) elems[4] = + { + tab[msa_getq_lane_s32(idxvec.val, 0)], + tab[msa_getq_lane_s32(idxvec.val, 1)], + tab[msa_getq_lane_s32(idxvec.val, 2)], + tab[msa_getq_lane_s32(idxvec.val, 3)] + }; + return v_uint32x4(msa_ld1q_u32(elems)); +} + +inline v_float32x4 v_lut(const float* tab, const v_int32x4& idxvec) +{ + int CV_DECL_ALIGNED(32) idx[4]; + v_store_aligned(idx, idxvec); + + return v_float32x4(tab[idx[0]], tab[idx[1]], tab[idx[2]], tab[idx[3]]); +} + +inline void v_lut_deinterleave(const float* tab, const v_int32x4& idxvec, v_float32x4& x, v_float32x4& y) +{ + int CV_DECL_ALIGNED(32) idx[4]; + v_store_aligned(idx, idxvec); + + v4f32 xy02 = msa_combine_f32(msa_ld1_f32(tab + idx[0]), msa_ld1_f32(tab + idx[2])); + v4f32 xy13 = msa_combine_f32(msa_ld1_f32(tab + idx[1]), msa_ld1_f32(tab + idx[3])); + x = v_float32x4(MSA_TPV_REINTERPRET(v4f32, msa_ilvevq_s32(MSA_TPV_REINTERPRET(v4i32, xy13), MSA_TPV_REINTERPRET(v4i32, xy02)))); + y = v_float32x4(MSA_TPV_REINTERPRET(v4f32, msa_ilvodq_s32(MSA_TPV_REINTERPRET(v4i32, xy13), MSA_TPV_REINTERPRET(v4i32, xy02)))); +} + +inline v_int8x16 v_interleave_pairs(const v_int8x16& vec) +{ + v_int8x16 c = v_int8x16(__builtin_msa_vshf_b((v16i8)((v2i64){0x0705060403010200, 0x0F0D0E0C0B090A08}), msa_dupq_n_s8(0), vec.val)); + return c; +} +inline v_uint8x16 v_interleave_pairs(const v_uint8x16& vec) +{ return v_reinterpret_as_u8(v_interleave_pairs(v_reinterpret_as_s8(vec))); } +inline v_int8x16 v_interleave_quads(const v_int8x16& vec) +{ + v_int8x16 c = v_int8x16(__builtin_msa_vshf_b((v16i8)((v2i64){0x0703060205010400, 0x0F0B0E0A0D090C08}), msa_dupq_n_s8(0), vec.val)); + return c; +} +inline v_uint8x16 v_interleave_quads(const v_uint8x16& vec) { return v_reinterpret_as_u8(v_interleave_quads(v_reinterpret_as_s8(vec))); } + +inline v_int16x8 v_interleave_pairs(const v_int16x8& vec) +{ + v_int16x8 c = v_int16x8(__builtin_msa_vshf_h((v8i16)((v2i64){0x0003000100020000, 0x0007000500060004}), msa_dupq_n_s16(0), vec.val)); + return c; +} + +inline v_uint16x8 v_interleave_pairs(const v_uint16x8& vec) { return v_reinterpret_as_u16(v_interleave_pairs(v_reinterpret_as_s16(vec))); } + +inline v_int16x8 v_interleave_quads(const v_int16x8& vec) +{ + v_int16x8 c = v_int16x8(__builtin_msa_vshf_h((v8i16)((v2i64){0x0005000100040000, 0x0007000300060002}), msa_dupq_n_s16(0), vec.val)); + return c; +} + +inline v_uint16x8 v_interleave_quads(const v_uint16x8& vec) { return v_reinterpret_as_u16(v_interleave_quads(v_reinterpret_as_s16(vec))); } + +inline v_int32x4 v_interleave_pairs(const v_int32x4& vec) +{ + v_int32x4 c; + c.val[0] = vec.val[0]; + c.val[1] = vec.val[2]; + c.val[2] = vec.val[1]; + c.val[3] = vec.val[3]; + return c; +} + +inline v_uint32x4 v_interleave_pairs(const v_uint32x4& vec) { return v_reinterpret_as_u32(v_interleave_pairs(v_reinterpret_as_s32(vec))); } +inline v_float32x4 v_interleave_pairs(const v_float32x4& vec) { return v_reinterpret_as_f32(v_interleave_pairs(v_reinterpret_as_s32(vec))); } + +inline v_int8x16 v_pack_triplets(const v_int8x16& vec) +{ + v_int8x16 c = v_int8x16(__builtin_msa_vshf_b((v16i8)((v2i64){0x0908060504020100, 0x131211100E0D0C0A}), msa_dupq_n_s8(0), vec.val)); + return c; +} + +inline v_uint8x16 v_pack_triplets(const v_uint8x16& vec) { return v_reinterpret_as_u8(v_pack_triplets(v_reinterpret_as_s8(vec))); } + +inline v_int16x8 v_pack_triplets(const v_int16x8& vec) +{ + v_int16x8 c = v_int16x8(__builtin_msa_vshf_h((v8i16)((v2i64){0x0004000200010000, 0x0009000800060005}), msa_dupq_n_s16(0), vec.val)); + return c; +} + +inline v_uint16x8 v_pack_triplets(const v_uint16x8& vec) { return v_reinterpret_as_u16(v_pack_triplets(v_reinterpret_as_s16(vec))); } +inline v_int32x4 v_pack_triplets(const v_int32x4& vec) { return vec; } +inline v_uint32x4 v_pack_triplets(const v_uint32x4& vec) { return vec; } +inline v_float32x4 v_pack_triplets(const v_float32x4& vec) { return vec; } + +inline v_float64x2 v_lut(const double* tab, const int* idx) +{ + double CV_DECL_ALIGNED(32) elems[2] = + { + tab[idx[0]], + tab[idx[1]] + }; + return v_float64x2(msa_ld1q_f64(elems)); +} + +inline v_float64x2 v_lut_pairs(const double* tab, const int* idx) +{ + return v_float64x2(msa_ld1q_f64(tab + idx[0])); +} + +inline v_float64x2 v_lut(const double* tab, const v_int32x4& idxvec) +{ + int CV_DECL_ALIGNED(32) idx[4]; + v_store_aligned(idx, idxvec); + + return v_float64x2(tab[idx[0]], tab[idx[1]]); +} + +inline void v_lut_deinterleave(const double* tab, const v_int32x4& idxvec, v_float64x2& x, v_float64x2& y) +{ + int CV_DECL_ALIGNED(32) idx[4]; + v_store_aligned(idx, idxvec); + + v2f64 xy0 = msa_ld1q_f64(tab + idx[0]); + v2f64 xy1 = msa_ld1q_f64(tab + idx[1]); + x = v_float64x2(MSA_TPV_REINTERPRET(v2f64, msa_ilvevq_s64(MSA_TPV_REINTERPRET(v2i64, xy1), MSA_TPV_REINTERPRET(v2i64, xy0)))); + y = v_float64x2(MSA_TPV_REINTERPRET(v2f64, msa_ilvodq_s64(MSA_TPV_REINTERPRET(v2i64, xy1), MSA_TPV_REINTERPRET(v2i64, xy0)))); +} + +template +inline typename _Tp::lane_type v_extract_n(const _Tp& a) +{ + return v_rotate_right(a).get0(); +} + +template +inline v_uint32x4 v_broadcast_element(const v_uint32x4& a) +{ + return v_setall_u32(v_extract_n(a)); +} +template +inline v_int32x4 v_broadcast_element(const v_int32x4& a) +{ + return v_setall_s32(v_extract_n(a)); +} +template +inline v_float32x4 v_broadcast_element(const v_float32x4& a) +{ + return v_setall_f32(v_extract_n(a)); +} + +////// FP16 support /////// +#if CV_FP16 +inline v_float32x4 v_load_expand(const float16_t* ptr) +{ +#ifndef msa_ld1_f16 + v4f16 v = (v4f16)msa_ld1_s16((const short*)ptr); +#else + v4f16 v = msa_ld1_f16((const __fp16*)ptr); +#endif + return v_float32x4(msa_cvt_f32_f16(v)); +} + +inline void v_pack_store(float16_t* ptr, const v_float32x4& v) +{ + v4f16 hv = msa_cvt_f16_f32(v.val); + +#ifndef msa_st1_f16 + msa_st1_s16((short*)ptr, (int16x4_t)hv); +#else + msa_st1_f16((__fp16*)ptr, hv); +#endif +} +#else +inline v_float32x4 v_load_expand(const float16_t* ptr) +{ + float buf[4]; + for( int i = 0; i < 4; i++ ) + buf[i] = (float)ptr[i]; + return v_load(buf); +} + +inline void v_pack_store(float16_t* ptr, const v_float32x4& v) +{ + float buf[4]; + v_store(buf, v); + for( int i = 0; i < 4; i++ ) + ptr[i] = (float16_t)buf[i]; +} +#endif + +inline void v_cleanup() {} + +CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END + +//! @endcond + +} + +#endif diff --git a/IPL/include/opencv/opencv2/core/hal/intrin_neon.hpp b/IPL/include/opencv/opencv2/core/hal/intrin_neon.hpp index f3e47ca..280691b 100644 --- a/IPL/include/opencv/opencv2/core/hal/intrin_neon.hpp +++ b/IPL/include/opencv/opencv2/core/hal/intrin_neon.hpp @@ -42,17 +42,82 @@ // //M*/ -#ifndef __OPENCV_HAL_INTRIN_NEON_HPP__ -#define __OPENCV_HAL_INTRIN_NEON_HPP__ +#ifndef OPENCV_HAL_INTRIN_NEON_HPP +#define OPENCV_HAL_INTRIN_NEON_HPP #include +#include "opencv2/core/utility.hpp" namespace cv { //! @cond IGNORED +CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN + #define CV_SIMD128 1 +#if defined(__aarch64__) || defined(_M_ARM64) +#define CV_SIMD128_64F 1 +#else +#define CV_SIMD128_64F 0 +#endif + +// TODO +#define CV_NEON_DOT 0 + +//////////// Utils //////////// + +#if CV_SIMD128_64F +#define OPENCV_HAL_IMPL_NEON_UNZIP(_Tpv, _Tpvx2, suffix) \ + inline void _v128_unzip(const _Tpv& a, const _Tpv& b, _Tpv& c, _Tpv& d) \ + { c = vuzp1q_##suffix(a, b); d = vuzp2q_##suffix(a, b); } +#define OPENCV_HAL_IMPL_NEON_UNZIP_L(_Tpv, _Tpvx2, suffix) \ + inline void _v128_unzip(const _Tpv&a, const _Tpv&b, _Tpv& c, _Tpv& d) \ + { c = vuzp1_##suffix(a, b); d = vuzp2_##suffix(a, b); } +#else +#define OPENCV_HAL_IMPL_NEON_UNZIP(_Tpv, _Tpvx2, suffix) \ + inline void _v128_unzip(const _Tpv& a, const _Tpv& b, _Tpv& c, _Tpv& d) \ + { _Tpvx2 ab = vuzpq_##suffix(a, b); c = ab.val[0]; d = ab.val[1]; } +#define OPENCV_HAL_IMPL_NEON_UNZIP_L(_Tpv, _Tpvx2, suffix) \ + inline void _v128_unzip(const _Tpv& a, const _Tpv& b, _Tpv& c, _Tpv& d) \ + { _Tpvx2 ab = vuzp_##suffix(a, b); c = ab.val[0]; d = ab.val[1]; } +#endif + +#if CV_SIMD128_64F +#define OPENCV_HAL_IMPL_NEON_REINTERPRET(_Tpv, suffix) \ + template static inline \ + _Tpv vreinterpretq_##suffix##_f64(T a) { return (_Tpv) a; } \ + template static inline \ + float64x2_t vreinterpretq_f64_##suffix(T a) { return (float64x2_t) a; } +#else +#define OPENCV_HAL_IMPL_NEON_REINTERPRET(_Tpv, suffix) +#endif + +#define OPENCV_HAL_IMPL_NEON_UTILS_SUFFIX(_Tpv, _Tpvl, suffix) \ + OPENCV_HAL_IMPL_NEON_UNZIP(_Tpv##_t, _Tpv##x2_t, suffix) \ + OPENCV_HAL_IMPL_NEON_UNZIP_L(_Tpvl##_t, _Tpvl##x2_t, suffix) \ + OPENCV_HAL_IMPL_NEON_REINTERPRET(_Tpv##_t, suffix) + +#define OPENCV_HAL_IMPL_NEON_UTILS_SUFFIX_I64(_Tpv, _Tpvl, suffix) \ + OPENCV_HAL_IMPL_NEON_REINTERPRET(_Tpv##_t, suffix) + +#define OPENCV_HAL_IMPL_NEON_UTILS_SUFFIX_F64(_Tpv, _Tpvl, suffix) \ + OPENCV_HAL_IMPL_NEON_UNZIP(_Tpv##_t, _Tpv##x2_t, suffix) + +OPENCV_HAL_IMPL_NEON_UTILS_SUFFIX(uint8x16, uint8x8, u8) +OPENCV_HAL_IMPL_NEON_UTILS_SUFFIX(int8x16, int8x8, s8) +OPENCV_HAL_IMPL_NEON_UTILS_SUFFIX(uint16x8, uint16x4, u16) +OPENCV_HAL_IMPL_NEON_UTILS_SUFFIX(int16x8, int16x4, s16) +OPENCV_HAL_IMPL_NEON_UTILS_SUFFIX(uint32x4, uint32x2, u32) +OPENCV_HAL_IMPL_NEON_UTILS_SUFFIX(int32x4, int32x2, s32) +OPENCV_HAL_IMPL_NEON_UTILS_SUFFIX(float32x4, float32x2, f32) +OPENCV_HAL_IMPL_NEON_UTILS_SUFFIX_I64(uint64x2, uint64x1, u64) +OPENCV_HAL_IMPL_NEON_UTILS_SUFFIX_I64(int64x2, int64x1, s64) +#if CV_SIMD128_64F +OPENCV_HAL_IMPL_NEON_UTILS_SUFFIX_F64(float64x2, float64x1,f64) +#endif + +//////////// Types //////////// struct v_uint8x16 { @@ -201,7 +266,7 @@ struct v_uint64x2 v_uint64x2() {} explicit v_uint64x2(uint64x2_t v) : val(v) {} - v_uint64x2(unsigned v0, unsigned v1) + v_uint64x2(uint64 v0, uint64 v1) { uint64 v[] = {v0, v1}; val = vld1q_u64(v); @@ -220,7 +285,7 @@ struct v_int64x2 v_int64x2() {} explicit v_int64x2(int64x2_t v) : val(v) {} - v_int64x2(int v0, int v1) + v_int64x2(int64 v0, int64 v1) { int64 v[] = {v0, v1}; val = vld1q_s64(v); @@ -232,6 +297,27 @@ struct v_int64x2 int64x2_t val; }; +#if CV_SIMD128_64F +struct v_float64x2 +{ + typedef double lane_type; + enum { nlanes = 2 }; + + v_float64x2() {} + explicit v_float64x2(float64x2_t v) : val(v) {} + v_float64x2(double v0, double v1) + { + double v[] = {v0, v1}; + val = vld1q_f64(v); + } + double get0() const + { + return vgetq_lane_f64(val, 0); + } + float64x2_t val; +}; +#endif + #define OPENCV_HAL_IMPL_NEON_INIT(_Tpv, _Tp, suffix) \ inline v_##_Tpv v_setzero_##suffix() { return v_##_Tpv(vdupq_n_##suffix((_Tp)0)); } \ inline v_##_Tpv v_setall_##suffix(_Tp v) { return v_##_Tpv(vdupq_n_##suffix(v)); } \ @@ -255,41 +341,85 @@ OPENCV_HAL_IMPL_NEON_INIT(int32x4, int, s32) OPENCV_HAL_IMPL_NEON_INIT(uint64x2, uint64, u64) OPENCV_HAL_IMPL_NEON_INIT(int64x2, int64, s64) OPENCV_HAL_IMPL_NEON_INIT(float32x4, float, f32) +#if CV_SIMD128_64F +#define OPENCV_HAL_IMPL_NEON_INIT_64(_Tpv, suffix) \ +inline v_float64x2 v_reinterpret_as_f64(const v_##_Tpv& v) { return v_float64x2(vreinterpretq_f64_##suffix(v.val)); } +OPENCV_HAL_IMPL_NEON_INIT(float64x2, double, f64) +OPENCV_HAL_IMPL_NEON_INIT_64(uint8x16, u8) +OPENCV_HAL_IMPL_NEON_INIT_64(int8x16, s8) +OPENCV_HAL_IMPL_NEON_INIT_64(uint16x8, u16) +OPENCV_HAL_IMPL_NEON_INIT_64(int16x8, s16) +OPENCV_HAL_IMPL_NEON_INIT_64(uint32x4, u32) +OPENCV_HAL_IMPL_NEON_INIT_64(int32x4, s32) +OPENCV_HAL_IMPL_NEON_INIT_64(uint64x2, u64) +OPENCV_HAL_IMPL_NEON_INIT_64(int64x2, s64) +OPENCV_HAL_IMPL_NEON_INIT_64(float32x4, f32) +OPENCV_HAL_IMPL_NEON_INIT_64(float64x2, f64) +#endif -#define OPENCV_HAL_IMPL_NEON_PACK(_Tpvec, _Tp, hreg, suffix, _Tpwvec, wsuffix, pack, op) \ +#define OPENCV_HAL_IMPL_NEON_PACK(_Tpvec, _Tp, hreg, suffix, _Tpwvec, pack, mov, rshr) \ inline _Tpvec v_##pack(const _Tpwvec& a, const _Tpwvec& b) \ { \ - hreg a1 = vqmov##op##_##wsuffix(a.val), b1 = vqmov##op##_##wsuffix(b.val); \ + hreg a1 = mov(a.val), b1 = mov(b.val); \ return _Tpvec(vcombine_##suffix(a1, b1)); \ } \ inline void v_##pack##_store(_Tp* ptr, const _Tpwvec& a) \ { \ - hreg a1 = vqmov##op##_##wsuffix(a.val); \ + hreg a1 = mov(a.val); \ vst1_##suffix(ptr, a1); \ } \ template inline \ _Tpvec v_rshr_##pack(const _Tpwvec& a, const _Tpwvec& b) \ { \ - hreg a1 = vqrshr##op##_n_##wsuffix(a.val, n); \ - hreg b1 = vqrshr##op##_n_##wsuffix(b.val, n); \ + hreg a1 = rshr(a.val, n); \ + hreg b1 = rshr(b.val, n); \ return _Tpvec(vcombine_##suffix(a1, b1)); \ } \ template inline \ void v_rshr_##pack##_store(_Tp* ptr, const _Tpwvec& a) \ { \ - hreg a1 = vqrshr##op##_n_##wsuffix(a.val, n); \ + hreg a1 = rshr(a.val, n); \ vst1_##suffix(ptr, a1); \ } -OPENCV_HAL_IMPL_NEON_PACK(v_uint8x16, uchar, uint8x8_t, u8, v_uint16x8, u16, pack, n) -OPENCV_HAL_IMPL_NEON_PACK(v_int8x16, schar, int8x8_t, s8, v_int16x8, s16, pack, n) -OPENCV_HAL_IMPL_NEON_PACK(v_uint16x8, ushort, uint16x4_t, u16, v_uint32x4, u32, pack, n) -OPENCV_HAL_IMPL_NEON_PACK(v_int16x8, short, int16x4_t, s16, v_int32x4, s32, pack, n) -OPENCV_HAL_IMPL_NEON_PACK(v_uint32x4, unsigned, uint32x2_t, u32, v_uint64x2, u64, pack, n) -OPENCV_HAL_IMPL_NEON_PACK(v_int32x4, int, int32x2_t, s32, v_int64x2, s64, pack, n) +OPENCV_HAL_IMPL_NEON_PACK(v_uint8x16, uchar, uint8x8_t, u8, v_uint16x8, pack, vqmovn_u16, vqrshrn_n_u16) +OPENCV_HAL_IMPL_NEON_PACK(v_int8x16, schar, int8x8_t, s8, v_int16x8, pack, vqmovn_s16, vqrshrn_n_s16) +OPENCV_HAL_IMPL_NEON_PACK(v_uint16x8, ushort, uint16x4_t, u16, v_uint32x4, pack, vqmovn_u32, vqrshrn_n_u32) +OPENCV_HAL_IMPL_NEON_PACK(v_int16x8, short, int16x4_t, s16, v_int32x4, pack, vqmovn_s32, vqrshrn_n_s32) +OPENCV_HAL_IMPL_NEON_PACK(v_uint32x4, unsigned, uint32x2_t, u32, v_uint64x2, pack, vmovn_u64, vrshrn_n_u64) +OPENCV_HAL_IMPL_NEON_PACK(v_int32x4, int, int32x2_t, s32, v_int64x2, pack, vmovn_s64, vrshrn_n_s64) + +OPENCV_HAL_IMPL_NEON_PACK(v_uint8x16, uchar, uint8x8_t, u8, v_int16x8, pack_u, vqmovun_s16, vqrshrun_n_s16) +OPENCV_HAL_IMPL_NEON_PACK(v_uint16x8, ushort, uint16x4_t, u16, v_int32x4, pack_u, vqmovun_s32, vqrshrun_n_s32) -OPENCV_HAL_IMPL_NEON_PACK(v_uint8x16, uchar, uint8x8_t, u8, v_int16x8, s16, pack_u, un) -OPENCV_HAL_IMPL_NEON_PACK(v_uint16x8, ushort, uint16x4_t, u16, v_int32x4, s32, pack_u, un) +// pack boolean +inline v_uint8x16 v_pack_b(const v_uint16x8& a, const v_uint16x8& b) +{ + uint8x16_t ab = vcombine_u8(vmovn_u16(a.val), vmovn_u16(b.val)); + return v_uint8x16(ab); +} + +inline v_uint8x16 v_pack_b(const v_uint32x4& a, const v_uint32x4& b, + const v_uint32x4& c, const v_uint32x4& d) +{ + uint16x8_t nab = vcombine_u16(vmovn_u32(a.val), vmovn_u32(b.val)); + uint16x8_t ncd = vcombine_u16(vmovn_u32(c.val), vmovn_u32(d.val)); + return v_uint8x16(vcombine_u8(vmovn_u16(nab), vmovn_u16(ncd))); +} + +inline v_uint8x16 v_pack_b(const v_uint64x2& a, const v_uint64x2& b, const v_uint64x2& c, + const v_uint64x2& d, const v_uint64x2& e, const v_uint64x2& f, + const v_uint64x2& g, const v_uint64x2& h) +{ + uint32x4_t ab = vcombine_u32(vmovn_u64(a.val), vmovn_u64(b.val)); + uint32x4_t cd = vcombine_u32(vmovn_u64(c.val), vmovn_u64(d.val)); + uint32x4_t ef = vcombine_u32(vmovn_u64(e.val), vmovn_u64(f.val)); + uint32x4_t gh = vcombine_u32(vmovn_u64(g.val), vmovn_u64(h.val)); + + uint16x8_t abcd = vcombine_u16(vmovn_u32(ab), vmovn_u32(cd)); + uint16x8_t efgh = vcombine_u16(vmovn_u32(ef), vmovn_u32(gh)); + return v_uint8x16(vcombine_u8(vmovn_u16(abcd), vmovn_u16(efgh))); +} inline v_float32x4 v_matmul(const v_float32x4& v, const v_float32x4& m0, const v_float32x4& m1, const v_float32x4& m2, @@ -303,6 +433,18 @@ inline v_float32x4 v_matmul(const v_float32x4& v, const v_float32x4& m0, return v_float32x4(res); } +inline v_float32x4 v_matmuladd(const v_float32x4& v, const v_float32x4& m0, + const v_float32x4& m1, const v_float32x4& m2, + const v_float32x4& a) +{ + float32x2_t vl = vget_low_f32(v.val), vh = vget_high_f32(v.val); + float32x4_t res = vmulq_lane_f32(m0.val, vl, 0); + res = vmlaq_lane_f32(res, m1.val, vl, 1); + res = vmlaq_lane_f32(res, m2.val, vh, 0); + res = vaddq_f32(res, a.val); + return v_float32x4(res); +} + #define OPENCV_HAL_IMPL_NEON_BIN_OP(bin_op, _Tpvec, intrin) \ inline _Tpvec operator bin_op (const _Tpvec& a, const _Tpvec& b) \ { \ @@ -320,10 +462,8 @@ OPENCV_HAL_IMPL_NEON_BIN_OP(+, v_int8x16, vqaddq_s8) OPENCV_HAL_IMPL_NEON_BIN_OP(-, v_int8x16, vqsubq_s8) OPENCV_HAL_IMPL_NEON_BIN_OP(+, v_uint16x8, vqaddq_u16) OPENCV_HAL_IMPL_NEON_BIN_OP(-, v_uint16x8, vqsubq_u16) -OPENCV_HAL_IMPL_NEON_BIN_OP(*, v_uint16x8, vmulq_u16) OPENCV_HAL_IMPL_NEON_BIN_OP(+, v_int16x8, vqaddq_s16) OPENCV_HAL_IMPL_NEON_BIN_OP(-, v_int16x8, vqsubq_s16) -OPENCV_HAL_IMPL_NEON_BIN_OP(*, v_int16x8, vmulq_s16) OPENCV_HAL_IMPL_NEON_BIN_OP(+, v_int32x4, vaddq_s32) OPENCV_HAL_IMPL_NEON_BIN_OP(-, v_int32x4, vsubq_s32) OPENCV_HAL_IMPL_NEON_BIN_OP(*, v_int32x4, vmulq_s32) @@ -337,7 +477,13 @@ OPENCV_HAL_IMPL_NEON_BIN_OP(+, v_int64x2, vaddq_s64) OPENCV_HAL_IMPL_NEON_BIN_OP(-, v_int64x2, vsubq_s64) OPENCV_HAL_IMPL_NEON_BIN_OP(+, v_uint64x2, vaddq_u64) OPENCV_HAL_IMPL_NEON_BIN_OP(-, v_uint64x2, vsubq_u64) - +#if CV_SIMD128_64F +OPENCV_HAL_IMPL_NEON_BIN_OP(/, v_float32x4, vdivq_f32) +OPENCV_HAL_IMPL_NEON_BIN_OP(+, v_float64x2, vaddq_f64) +OPENCV_HAL_IMPL_NEON_BIN_OP(-, v_float64x2, vsubq_f64) +OPENCV_HAL_IMPL_NEON_BIN_OP(*, v_float64x2, vmulq_f64) +OPENCV_HAL_IMPL_NEON_BIN_OP(/, v_float64x2, vdivq_f64) +#else inline v_float32x4 operator / (const v_float32x4& a, const v_float32x4& b) { float32x4_t reciprocal = vrecpeq_f32(b.val); @@ -353,6 +499,38 @@ inline v_float32x4& operator /= (v_float32x4& a, const v_float32x4& b) a.val = vmulq_f32(a.val, reciprocal); return a; } +#endif + +// saturating multiply 8-bit, 16-bit +#define OPENCV_HAL_IMPL_NEON_MUL_SAT(_Tpvec, _Tpwvec) \ + inline _Tpvec operator * (const _Tpvec& a, const _Tpvec& b) \ + { \ + _Tpwvec c, d; \ + v_mul_expand(a, b, c, d); \ + return v_pack(c, d); \ + } \ + inline _Tpvec& operator *= (_Tpvec& a, const _Tpvec& b) \ + { a = a * b; return a; } + +OPENCV_HAL_IMPL_NEON_MUL_SAT(v_int8x16, v_int16x8) +OPENCV_HAL_IMPL_NEON_MUL_SAT(v_uint8x16, v_uint16x8) +OPENCV_HAL_IMPL_NEON_MUL_SAT(v_int16x8, v_int32x4) +OPENCV_HAL_IMPL_NEON_MUL_SAT(v_uint16x8, v_uint32x4) + +// Multiply and expand +inline void v_mul_expand(const v_int8x16& a, const v_int8x16& b, + v_int16x8& c, v_int16x8& d) +{ + c.val = vmull_s8(vget_low_s8(a.val), vget_low_s8(b.val)); + d.val = vmull_s8(vget_high_s8(a.val), vget_high_s8(b.val)); +} + +inline void v_mul_expand(const v_uint8x16& a, const v_uint8x16& b, + v_uint16x8& c, v_uint16x8& d) +{ + c.val = vmull_u8(vget_low_u8(a.val), vget_low_u8(b.val)); + d.val = vmull_u8(vget_high_u8(a.val), vget_high_u8(b.val)); +} inline void v_mul_expand(const v_int16x8& a, const v_int16x8& b, v_int32x4& c, v_int32x4& d) @@ -375,13 +553,286 @@ inline void v_mul_expand(const v_uint32x4& a, const v_uint32x4& b, d.val = vmull_u32(vget_high_u32(a.val), vget_high_u32(b.val)); } +inline v_int16x8 v_mul_hi(const v_int16x8& a, const v_int16x8& b) +{ + return v_int16x8(vcombine_s16( + vshrn_n_s32(vmull_s16( vget_low_s16(a.val), vget_low_s16(b.val)), 16), + vshrn_n_s32(vmull_s16(vget_high_s16(a.val), vget_high_s16(b.val)), 16) + )); +} +inline v_uint16x8 v_mul_hi(const v_uint16x8& a, const v_uint16x8& b) +{ + return v_uint16x8(vcombine_u16( + vshrn_n_u32(vmull_u16( vget_low_u16(a.val), vget_low_u16(b.val)), 16), + vshrn_n_u32(vmull_u16(vget_high_u16(a.val), vget_high_u16(b.val)), 16) + )); +} + +//////// Dot Product //////// + +// 16 >> 32 inline v_int32x4 v_dotprod(const v_int16x8& a, const v_int16x8& b) { - int32x4_t c = vmull_s16(vget_low_s16(a.val), vget_low_s16(b.val)); - int32x4_t d = vmull_s16(vget_high_s16(a.val), vget_high_s16(b.val)); - int32x4x2_t cd = vuzpq_s32(c, d); - return v_int32x4(vaddq_s32(cd.val[0], cd.val[1])); + int16x8_t uzp1, uzp2; + _v128_unzip(a.val, b.val, uzp1, uzp2); + int16x4_t a0 = vget_low_s16(uzp1); + int16x4_t b0 = vget_high_s16(uzp1); + int16x4_t a1 = vget_low_s16(uzp2); + int16x4_t b1 = vget_high_s16(uzp2); + int32x4_t p = vmull_s16(a0, b0); + return v_int32x4(vmlal_s16(p, a1, b1)); +} +inline v_int32x4 v_dotprod(const v_int16x8& a, const v_int16x8& b, const v_int32x4& c) +{ + int16x8_t uzp1, uzp2; + _v128_unzip(a.val, b.val, uzp1, uzp2); + int16x4_t a0 = vget_low_s16(uzp1); + int16x4_t b0 = vget_high_s16(uzp1); + int16x4_t a1 = vget_low_s16(uzp2); + int16x4_t b1 = vget_high_s16(uzp2); + int32x4_t p = vmlal_s16(c.val, a0, b0); + return v_int32x4(vmlal_s16(p, a1, b1)); +} + +// 32 >> 64 +inline v_int64x2 v_dotprod(const v_int32x4& a, const v_int32x4& b) +{ + int32x4_t uzp1, uzp2; + _v128_unzip(a.val, b.val, uzp1, uzp2); + int32x2_t a0 = vget_low_s32(uzp1); + int32x2_t b0 = vget_high_s32(uzp1); + int32x2_t a1 = vget_low_s32(uzp2); + int32x2_t b1 = vget_high_s32(uzp2); + int64x2_t p = vmull_s32(a0, b0); + return v_int64x2(vmlal_s32(p, a1, b1)); +} +inline v_int64x2 v_dotprod(const v_int32x4& a, const v_int32x4& b, const v_int64x2& c) +{ + int32x4_t uzp1, uzp2; + _v128_unzip(a.val, b.val, uzp1, uzp2); + int32x2_t a0 = vget_low_s32(uzp1); + int32x2_t b0 = vget_high_s32(uzp1); + int32x2_t a1 = vget_low_s32(uzp2); + int32x2_t b1 = vget_high_s32(uzp2); + int64x2_t p = vmlal_s32(c.val, a0, b0); + return v_int64x2(vmlal_s32(p, a1, b1)); +} + +// 8 >> 32 +inline v_uint32x4 v_dotprod_expand(const v_uint8x16& a, const v_uint8x16& b) +{ +#if CV_NEON_DOT + return v_uint32x4(vdotq_u32(vdupq_n_u32(0), a.val, b.val)); +#else + const uint8x16_t zero = vreinterpretq_u8_u32(vdupq_n_u32(0)); + const uint8x16_t mask = vreinterpretq_u8_u32(vdupq_n_u32(0x00FF00FF)); + const uint16x8_t zero32 = vreinterpretq_u16_u32(vdupq_n_u32(0)); + const uint16x8_t mask32 = vreinterpretq_u16_u32(vdupq_n_u32(0x0000FFFF)); + + uint16x8_t even = vmulq_u16(vreinterpretq_u16_u8(vbslq_u8(mask, a.val, zero)), + vreinterpretq_u16_u8(vbslq_u8(mask, b.val, zero))); + uint16x8_t odd = vmulq_u16(vshrq_n_u16(vreinterpretq_u16_u8(a.val), 8), + vshrq_n_u16(vreinterpretq_u16_u8(b.val), 8)); + + uint32x4_t s0 = vaddq_u32(vreinterpretq_u32_u16(vbslq_u16(mask32, even, zero32)), + vreinterpretq_u32_u16(vbslq_u16(mask32, odd, zero32))); + uint32x4_t s1 = vaddq_u32(vshrq_n_u32(vreinterpretq_u32_u16(even), 16), + vshrq_n_u32(vreinterpretq_u32_u16(odd), 16)); + return v_uint32x4(vaddq_u32(s0, s1)); +#endif +} +inline v_uint32x4 v_dotprod_expand(const v_uint8x16& a, const v_uint8x16& b, + const v_uint32x4& c) +{ +#if CV_NEON_DOT + return v_uint32x4(vdotq_u32(c.val, a.val, b.val)); +#else + return v_dotprod_expand(a, b) + c; +#endif +} + +inline v_int32x4 v_dotprod_expand(const v_int8x16& a, const v_int8x16& b) +{ +#if CV_NEON_DOT + return v_int32x4(vdotq_s32(vdupq_n_s32(0), a.val, b.val)); +#else + int16x8_t p0 = vmull_s8(vget_low_s8(a.val), vget_low_s8(b.val)); + int16x8_t p1 = vmull_s8(vget_high_s8(a.val), vget_high_s8(b.val)); + int16x8_t uzp1, uzp2; + _v128_unzip(p0, p1, uzp1, uzp2); + int16x8_t sum = vaddq_s16(uzp1, uzp2); + int16x4_t uzpl1, uzpl2; + _v128_unzip(vget_low_s16(sum), vget_high_s16(sum), uzpl1, uzpl2); + return v_int32x4(vaddl_s16(uzpl1, uzpl2)); +#endif +} +inline v_int32x4 v_dotprod_expand(const v_int8x16& a, const v_int8x16& b, + const v_int32x4& c) +{ +#if CV_NEON_DOT + return v_int32x4(vdotq_s32(c.val, a.val, b.val)); +#else + return v_dotprod_expand(a, b) + c; +#endif +} + +// 16 >> 64 +inline v_uint64x2 v_dotprod_expand(const v_uint16x8& a, const v_uint16x8& b) +{ + const uint16x8_t zero = vreinterpretq_u16_u32(vdupq_n_u32(0)); + const uint16x8_t mask = vreinterpretq_u16_u32(vdupq_n_u32(0x0000FFFF)); + + uint32x4_t even = vmulq_u32(vreinterpretq_u32_u16(vbslq_u16(mask, a.val, zero)), + vreinterpretq_u32_u16(vbslq_u16(mask, b.val, zero))); + uint32x4_t odd = vmulq_u32(vshrq_n_u32(vreinterpretq_u32_u16(a.val), 16), + vshrq_n_u32(vreinterpretq_u32_u16(b.val), 16)); + uint32x4_t uzp1, uzp2; + _v128_unzip(even, odd, uzp1, uzp2); + uint64x2_t s0 = vaddl_u32(vget_low_u32(uzp1), vget_high_u32(uzp1)); + uint64x2_t s1 = vaddl_u32(vget_low_u32(uzp2), vget_high_u32(uzp2)); + return v_uint64x2(vaddq_u64(s0, s1)); +} +inline v_uint64x2 v_dotprod_expand(const v_uint16x8& a, const v_uint16x8& b, const v_uint64x2& c) +{ return v_dotprod_expand(a, b) + c; } + +inline v_int64x2 v_dotprod_expand(const v_int16x8& a, const v_int16x8& b) +{ + int32x4_t p0 = vmull_s16(vget_low_s16(a.val), vget_low_s16(b.val)); + int32x4_t p1 = vmull_s16(vget_high_s16(a.val), vget_high_s16(b.val)); + + int32x4_t uzp1, uzp2; + _v128_unzip(p0, p1, uzp1, uzp2); + int32x4_t sum = vaddq_s32(uzp1, uzp2); + + int32x2_t uzpl1, uzpl2; + _v128_unzip(vget_low_s32(sum), vget_high_s32(sum), uzpl1, uzpl2); + return v_int64x2(vaddl_s32(uzpl1, uzpl2)); } +inline v_int64x2 v_dotprod_expand(const v_int16x8& a, const v_int16x8& b, + const v_int64x2& c) +{ return v_dotprod_expand(a, b) + c; } + +// 32 >> 64f +#if CV_SIMD128_64F +inline v_float64x2 v_dotprod_expand(const v_int32x4& a, const v_int32x4& b) +{ return v_cvt_f64(v_dotprod(a, b)); } +inline v_float64x2 v_dotprod_expand(const v_int32x4& a, const v_int32x4& b, + const v_float64x2& c) +{ return v_dotprod_expand(a, b) + c; } +#endif + +//////// Fast Dot Product //////// + +// 16 >> 32 +inline v_int32x4 v_dotprod_fast(const v_int16x8& a, const v_int16x8& b) +{ + int16x4_t a0 = vget_low_s16(a.val); + int16x4_t a1 = vget_high_s16(a.val); + int16x4_t b0 = vget_low_s16(b.val); + int16x4_t b1 = vget_high_s16(b.val); + int32x4_t p = vmull_s16(a0, b0); + return v_int32x4(vmlal_s16(p, a1, b1)); +} +inline v_int32x4 v_dotprod_fast(const v_int16x8& a, const v_int16x8& b, const v_int32x4& c) +{ + int16x4_t a0 = vget_low_s16(a.val); + int16x4_t a1 = vget_high_s16(a.val); + int16x4_t b0 = vget_low_s16(b.val); + int16x4_t b1 = vget_high_s16(b.val); + int32x4_t p = vmlal_s16(c.val, a0, b0); + return v_int32x4(vmlal_s16(p, a1, b1)); +} + +// 32 >> 64 +inline v_int64x2 v_dotprod_fast(const v_int32x4& a, const v_int32x4& b) +{ + int32x2_t a0 = vget_low_s32(a.val); + int32x2_t a1 = vget_high_s32(a.val); + int32x2_t b0 = vget_low_s32(b.val); + int32x2_t b1 = vget_high_s32(b.val); + int64x2_t p = vmull_s32(a0, b0); + return v_int64x2(vmlal_s32(p, a1, b1)); +} +inline v_int64x2 v_dotprod_fast(const v_int32x4& a, const v_int32x4& b, const v_int64x2& c) +{ + int32x2_t a0 = vget_low_s32(a.val); + int32x2_t a1 = vget_high_s32(a.val); + int32x2_t b0 = vget_low_s32(b.val); + int32x2_t b1 = vget_high_s32(b.val); + int64x2_t p = vmlal_s32(c.val, a0, b0); + return v_int64x2(vmlal_s32(p, a1, b1)); +} + +// 8 >> 32 +inline v_uint32x4 v_dotprod_expand_fast(const v_uint8x16& a, const v_uint8x16& b) +{ +#if CV_NEON_DOT + return v_uint32x4(vdotq_u32(vdupq_n_u32(0), a.val, b.val)); +#else + uint16x8_t p0 = vmull_u8(vget_low_u8(a.val), vget_low_u8(b.val)); + uint16x8_t p1 = vmull_u8(vget_high_u8(a.val), vget_high_u8(b.val)); + uint32x4_t s0 = vaddl_u16(vget_low_u16(p0), vget_low_u16(p1)); + uint32x4_t s1 = vaddl_u16(vget_high_u16(p0), vget_high_u16(p1)); + return v_uint32x4(vaddq_u32(s0, s1)); +#endif +} +inline v_uint32x4 v_dotprod_expand_fast(const v_uint8x16& a, const v_uint8x16& b, const v_uint32x4& c) +{ +#if CV_NEON_DOT + return v_uint32x4(vdotq_u32(c.val, a.val, b.val)); +#else + return v_dotprod_expand_fast(a, b) + c; +#endif +} + +inline v_int32x4 v_dotprod_expand_fast(const v_int8x16& a, const v_int8x16& b) +{ +#if CV_NEON_DOT + return v_int32x4(vdotq_s32(vdupq_n_s32(0), a.val, b.val)); +#else + int16x8_t prod = vmull_s8(vget_low_s8(a.val), vget_low_s8(b.val)); + prod = vmlal_s8(prod, vget_high_s8(a.val), vget_high_s8(b.val)); + return v_int32x4(vaddl_s16(vget_low_s16(prod), vget_high_s16(prod))); +#endif +} +inline v_int32x4 v_dotprod_expand_fast(const v_int8x16& a, const v_int8x16& b, const v_int32x4& c) +{ +#if CV_NEON_DOT + return v_int32x4(vdotq_s32(c.val, a.val, b.val)); +#else + return v_dotprod_expand_fast(a, b) + c; +#endif +} + +// 16 >> 64 +inline v_uint64x2 v_dotprod_expand_fast(const v_uint16x8& a, const v_uint16x8& b) +{ + uint32x4_t p0 = vmull_u16(vget_low_u16(a.val), vget_low_u16(b.val)); + uint32x4_t p1 = vmull_u16(vget_high_u16(a.val), vget_high_u16(b.val)); + uint64x2_t s0 = vaddl_u32(vget_low_u32(p0), vget_high_u32(p0)); + uint64x2_t s1 = vaddl_u32(vget_low_u32(p1), vget_high_u32(p1)); + return v_uint64x2(vaddq_u64(s0, s1)); +} +inline v_uint64x2 v_dotprod_expand_fast(const v_uint16x8& a, const v_uint16x8& b, const v_uint64x2& c) +{ return v_dotprod_expand_fast(a, b) + c; } + +inline v_int64x2 v_dotprod_expand_fast(const v_int16x8& a, const v_int16x8& b) +{ + int32x4_t prod = vmull_s16(vget_low_s16(a.val), vget_low_s16(b.val)); + prod = vmlal_s16(prod, vget_high_s16(a.val), vget_high_s16(b.val)); + return v_int64x2(vaddl_s32(vget_low_s32(prod), vget_high_s32(prod))); +} +inline v_int64x2 v_dotprod_expand_fast(const v_int16x8& a, const v_int16x8& b, const v_int64x2& c) +{ return v_dotprod_expand_fast(a, b) + c; } + +// 32 >> 64f +#if CV_SIMD128_64F +inline v_float64x2 v_dotprod_expand_fast(const v_int32x4& a, const v_int32x4& b) +{ return v_cvt_f64(v_dotprod_fast(a, b)); } +inline v_float64x2 v_dotprod_expand_fast(const v_int32x4& a, const v_int32x4& b, const v_float64x2& c) +{ return v_dotprod_expand_fast(a, b) + c; } +#endif + #define OPENCV_HAL_IMPL_NEON_LOGIC_OP(_Tpvec, suffix) \ OPENCV_HAL_IMPL_NEON_BIN_OP(&, _Tpvec, vandq_##suffix) \ @@ -421,6 +872,18 @@ inline v_float32x4 operator ~ (const v_float32x4& a) return v_float32x4(vreinterpretq_f32_s32(vmvnq_s32(vreinterpretq_s32_f32(a.val)))); } +#if CV_SIMD128_64F +inline v_float32x4 v_sqrt(const v_float32x4& x) +{ + return v_float32x4(vsqrtq_f32(x.val)); +} + +inline v_float32x4 v_invsqrt(const v_float32x4& x) +{ + v_float32x4 one = v_setall_f32(1.0f); + return one / v_sqrt(x); +} +#else inline v_float32x4 v_sqrt(const v_float32x4& x) { float32x4_t x1 = vmaxq_f32(x.val, vdupq_n_f32(FLT_MIN)); @@ -437,10 +900,54 @@ inline v_float32x4 v_invsqrt(const v_float32x4& x) e = vmulq_f32(vrsqrtsq_f32(vmulq_f32(x.val, e), e), e); return v_float32x4(e); } +#endif + +#define OPENCV_HAL_IMPL_NEON_ABS(_Tpuvec, _Tpsvec, usuffix, ssuffix) \ +inline _Tpuvec v_abs(const _Tpsvec& a) { return v_reinterpret_as_##usuffix(_Tpsvec(vabsq_##ssuffix(a.val))); } + +OPENCV_HAL_IMPL_NEON_ABS(v_uint8x16, v_int8x16, u8, s8) +OPENCV_HAL_IMPL_NEON_ABS(v_uint16x8, v_int16x8, u16, s16) +OPENCV_HAL_IMPL_NEON_ABS(v_uint32x4, v_int32x4, u32, s32) inline v_float32x4 v_abs(v_float32x4 x) { return v_float32x4(vabsq_f32(x.val)); } +#if CV_SIMD128_64F +#define OPENCV_HAL_IMPL_NEON_DBL_BIT_OP(bin_op, intrin) \ +inline v_float64x2 operator bin_op (const v_float64x2& a, const v_float64x2& b) \ +{ \ + return v_float64x2(vreinterpretq_f64_s64(intrin(vreinterpretq_s64_f64(a.val), vreinterpretq_s64_f64(b.val)))); \ +} \ +inline v_float64x2& operator bin_op##= (v_float64x2& a, const v_float64x2& b) \ +{ \ + a.val = vreinterpretq_f64_s64(intrin(vreinterpretq_s64_f64(a.val), vreinterpretq_s64_f64(b.val))); \ + return a; \ +} + +OPENCV_HAL_IMPL_NEON_DBL_BIT_OP(&, vandq_s64) +OPENCV_HAL_IMPL_NEON_DBL_BIT_OP(|, vorrq_s64) +OPENCV_HAL_IMPL_NEON_DBL_BIT_OP(^, veorq_s64) + +inline v_float64x2 operator ~ (const v_float64x2& a) +{ + return v_float64x2(vreinterpretq_f64_s32(vmvnq_s32(vreinterpretq_s32_f64(a.val)))); +} + +inline v_float64x2 v_sqrt(const v_float64x2& x) +{ + return v_float64x2(vsqrtq_f64(x.val)); +} + +inline v_float64x2 v_invsqrt(const v_float64x2& x) +{ + v_float64x2 one = v_setall_f64(1.0f); + return one / v_sqrt(x); +} + +inline v_float64x2 v_abs(v_float64x2 x) +{ return v_float64x2(vabsq_f64(x.val)); } +#endif + // TODO: exp, log, sin, cos #define OPENCV_HAL_IMPL_NEON_BIN_FUNC(_Tpvec, func, intrin) \ @@ -463,8 +970,23 @@ OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_int32x4, v_min, vminq_s32) OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_int32x4, v_max, vmaxq_s32) OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_float32x4, v_min, vminq_f32) OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_float32x4, v_max, vmaxq_f32) +#if CV_SIMD128_64F +OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_float64x2, v_min, vminq_f64) +OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_float64x2, v_max, vmaxq_f64) +#endif - +#if CV_SIMD128_64F +inline int64x2_t vmvnq_s64(int64x2_t a) +{ + int64x2_t vx = vreinterpretq_s64_u32(vdupq_n_u32(0xFFFFFFFF)); + return veorq_s64(a, vx); +} +inline uint64x2_t vmvnq_u64(uint64x2_t a) +{ + uint64x2_t vx = vreinterpretq_u64_u32(vdupq_n_u32(0xFFFFFFFF)); + return veorq_u64(a, vx); +} +#endif #define OPENCV_HAL_IMPL_NEON_INT_CMP_OP(_Tpvec, cast, suffix, not_suffix) \ inline _Tpvec operator == (const _Tpvec& a, const _Tpvec& b) \ { return _Tpvec(cast(vceqq_##suffix(a.val, b.val))); } \ @@ -486,6 +1008,18 @@ OPENCV_HAL_IMPL_NEON_INT_CMP_OP(v_int16x8, vreinterpretq_s16_u16, s16, u16) OPENCV_HAL_IMPL_NEON_INT_CMP_OP(v_uint32x4, OPENCV_HAL_NOP, u32, u32) OPENCV_HAL_IMPL_NEON_INT_CMP_OP(v_int32x4, vreinterpretq_s32_u32, s32, u32) OPENCV_HAL_IMPL_NEON_INT_CMP_OP(v_float32x4, vreinterpretq_f32_u32, f32, u32) +#if CV_SIMD128_64F +OPENCV_HAL_IMPL_NEON_INT_CMP_OP(v_uint64x2, OPENCV_HAL_NOP, u64, u64) +OPENCV_HAL_IMPL_NEON_INT_CMP_OP(v_int64x2, vreinterpretq_s64_u64, s64, u64) +OPENCV_HAL_IMPL_NEON_INT_CMP_OP(v_float64x2, vreinterpretq_f64_u64, f64, u64) +#endif + +inline v_float32x4 v_not_nan(const v_float32x4& a) +{ return v_float32x4(vreinterpretq_f32_u32(vceqq_f32(a.val, a.val))); } +#if CV_SIMD128_64F +inline v_float64x2 v_not_nan(const v_float64x2& a) +{ return v_float64x2(vreinterpretq_f64_u64(vceqq_f64(a.val, a.val))); } +#endif OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_uint8x16, v_add_wrap, vaddq_u8) OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_int8x16, v_add_wrap, vaddq_s8) @@ -495,12 +1029,24 @@ OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_uint8x16, v_sub_wrap, vsubq_u8) OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_int8x16, v_sub_wrap, vsubq_s8) OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_uint16x8, v_sub_wrap, vsubq_u16) OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_int16x8, v_sub_wrap, vsubq_s16) +OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_uint8x16, v_mul_wrap, vmulq_u8) +OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_int8x16, v_mul_wrap, vmulq_s8) +OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_uint16x8, v_mul_wrap, vmulq_u16) +OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_int16x8, v_mul_wrap, vmulq_s16) -// TODO: absdiff for signed integers OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_uint8x16, v_absdiff, vabdq_u8) OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_uint16x8, v_absdiff, vabdq_u16) OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_uint32x4, v_absdiff, vabdq_u32) OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_float32x4, v_absdiff, vabdq_f32) +#if CV_SIMD128_64F +OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_float64x2, v_absdiff, vabdq_f64) +#endif + +/** Saturating absolute difference **/ +inline v_int8x16 v_absdiffs(const v_int8x16& a, const v_int8x16& b) +{ return v_int8x16(vqabsq_s8(vqsubq_s8(a.val, b.val))); } +inline v_int16x8 v_absdiffs(const v_int16x8& a, const v_int16x8& b) +{ return v_int16x8(vqabsq_s16(vqsubq_s16(a.val, b.val))); } #define OPENCV_HAL_IMPL_NEON_BIN_FUNC2(_Tpvec, _Tpvec2, cast, func, intrin) \ inline _Tpvec2 func(const _Tpvec& a, const _Tpvec& b) \ @@ -523,10 +1069,54 @@ inline v_float32x4 v_sqr_magnitude(const v_float32x4& a, const v_float32x4& b) return v_float32x4(vmlaq_f32(vmulq_f32(a.val, a.val), b.val, b.val)); } -inline v_float32x4 v_muladd(const v_float32x4& a, const v_float32x4& b, const v_float32x4& c) +inline v_float32x4 v_fma(const v_float32x4& a, const v_float32x4& b, const v_float32x4& c) { +#if CV_SIMD128_64F + // ARMv8, which adds support for 64-bit floating-point (so CV_SIMD128_64F is defined), + // also adds FMA support both for single- and double-precision floating-point vectors + return v_float32x4(vfmaq_f32(c.val, a.val, b.val)); +#else return v_float32x4(vmlaq_f32(c.val, a.val, b.val)); +#endif +} + +inline v_int32x4 v_fma(const v_int32x4& a, const v_int32x4& b, const v_int32x4& c) +{ + return v_int32x4(vmlaq_s32(c.val, a.val, b.val)); +} + +inline v_float32x4 v_muladd(const v_float32x4& a, const v_float32x4& b, const v_float32x4& c) +{ + return v_fma(a, b, c); +} + +inline v_int32x4 v_muladd(const v_int32x4& a, const v_int32x4& b, const v_int32x4& c) +{ + return v_fma(a, b, c); +} + +#if CV_SIMD128_64F +inline v_float64x2 v_magnitude(const v_float64x2& a, const v_float64x2& b) +{ + v_float64x2 x(vaddq_f64(vmulq_f64(a.val, a.val), vmulq_f64(b.val, b.val))); + return v_sqrt(x); +} + +inline v_float64x2 v_sqr_magnitude(const v_float64x2& a, const v_float64x2& b) +{ + return v_float64x2(vaddq_f64(vmulq_f64(a.val, a.val), vmulq_f64(b.val, b.val))); +} + +inline v_float64x2 v_fma(const v_float64x2& a, const v_float64x2& b, const v_float64x2& c) +{ + return v_float64x2(vfmaq_f64(c.val, a.val, b.val)); +} + +inline v_float64x2 v_muladd(const v_float64x2& a, const v_float64x2& b, const v_float64x2& c) +{ + return v_fma(a, b, c); } +#endif // trade efficiency for convenience #define OPENCV_HAL_IMPL_NEON_SHIFT_OP(_Tpvec, suffix, _Tps, ssuffix) \ @@ -550,17 +1140,64 @@ OPENCV_HAL_IMPL_NEON_SHIFT_OP(v_int32x4, s32, int, s32) OPENCV_HAL_IMPL_NEON_SHIFT_OP(v_uint64x2, u64, int64, s64) OPENCV_HAL_IMPL_NEON_SHIFT_OP(v_int64x2, s64, int64, s64) +#define OPENCV_HAL_IMPL_NEON_ROTATE_OP(_Tpvec, suffix) \ +template inline _Tpvec v_rotate_right(const _Tpvec& a) \ +{ return _Tpvec(vextq_##suffix(a.val, vdupq_n_##suffix(0), n)); } \ +template inline _Tpvec v_rotate_left(const _Tpvec& a) \ +{ return _Tpvec(vextq_##suffix(vdupq_n_##suffix(0), a.val, _Tpvec::nlanes - n)); } \ +template<> inline _Tpvec v_rotate_left<0>(const _Tpvec& a) \ +{ return a; } \ +template inline _Tpvec v_rotate_right(const _Tpvec& a, const _Tpvec& b) \ +{ return _Tpvec(vextq_##suffix(a.val, b.val, n)); } \ +template inline _Tpvec v_rotate_left(const _Tpvec& a, const _Tpvec& b) \ +{ return _Tpvec(vextq_##suffix(b.val, a.val, _Tpvec::nlanes - n)); } \ +template<> inline _Tpvec v_rotate_left<0>(const _Tpvec& a, const _Tpvec& b) \ +{ CV_UNUSED(b); return a; } + +OPENCV_HAL_IMPL_NEON_ROTATE_OP(v_uint8x16, u8) +OPENCV_HAL_IMPL_NEON_ROTATE_OP(v_int8x16, s8) +OPENCV_HAL_IMPL_NEON_ROTATE_OP(v_uint16x8, u16) +OPENCV_HAL_IMPL_NEON_ROTATE_OP(v_int16x8, s16) +OPENCV_HAL_IMPL_NEON_ROTATE_OP(v_uint32x4, u32) +OPENCV_HAL_IMPL_NEON_ROTATE_OP(v_int32x4, s32) +OPENCV_HAL_IMPL_NEON_ROTATE_OP(v_float32x4, f32) +OPENCV_HAL_IMPL_NEON_ROTATE_OP(v_uint64x2, u64) +OPENCV_HAL_IMPL_NEON_ROTATE_OP(v_int64x2, s64) +#if CV_SIMD128_64F +OPENCV_HAL_IMPL_NEON_ROTATE_OP(v_float64x2, f64) +#endif + +#if defined(__clang__) && defined(__aarch64__) +// avoid LD2 instruction. details: https://github.com/opencv/opencv/issues/14863 +#define OPENCV_HAL_IMPL_NEON_LOAD_LOW_OP(_Tpvec, _Tp, suffix) \ +inline _Tpvec v_load_low(const _Tp* ptr) \ +{ \ +typedef uint64 CV_DECL_ALIGNED(1) unaligned_uint64; \ +uint64 v = *(unaligned_uint64*)ptr; \ +return _Tpvec(v_reinterpret_as_##suffix(v_uint64x2(v, (uint64)123456))); \ +} +#else +#define OPENCV_HAL_IMPL_NEON_LOAD_LOW_OP(_Tpvec, _Tp, suffix) \ +inline _Tpvec v_load_low(const _Tp* ptr) \ +{ return _Tpvec(vcombine_##suffix(vld1_##suffix(ptr), vdup_n_##suffix((_Tp)0))); } +#endif + #define OPENCV_HAL_IMPL_NEON_LOADSTORE_OP(_Tpvec, _Tp, suffix) \ inline _Tpvec v_load(const _Tp* ptr) \ { return _Tpvec(vld1q_##suffix(ptr)); } \ inline _Tpvec v_load_aligned(const _Tp* ptr) \ { return _Tpvec(vld1q_##suffix(ptr)); } \ +OPENCV_HAL_IMPL_NEON_LOAD_LOW_OP(_Tpvec, _Tp, suffix) \ inline _Tpvec v_load_halves(const _Tp* ptr0, const _Tp* ptr1) \ { return _Tpvec(vcombine_##suffix(vld1_##suffix(ptr0), vld1_##suffix(ptr1))); } \ inline void v_store(_Tp* ptr, const _Tpvec& a) \ { vst1q_##suffix(ptr, a.val); } \ inline void v_store_aligned(_Tp* ptr, const _Tpvec& a) \ { vst1q_##suffix(ptr, a.val); } \ +inline void v_store_aligned_nocache(_Tp* ptr, const _Tpvec& a) \ +{ vst1q_##suffix(ptr, a.val); } \ +inline void v_store(_Tp* ptr, const _Tpvec& a, hal::StoreMode /*mode*/) \ +{ vst1q_##suffix(ptr, a.val); } \ inline void v_store_low(_Tp* ptr, const _Tpvec& a) \ { vst1_##suffix(ptr, vget_low_##suffix(a.val)); } \ inline void v_store_high(_Tp* ptr, const _Tpvec& a) \ @@ -575,26 +1212,164 @@ OPENCV_HAL_IMPL_NEON_LOADSTORE_OP(v_int32x4, int, s32) OPENCV_HAL_IMPL_NEON_LOADSTORE_OP(v_uint64x2, uint64, u64) OPENCV_HAL_IMPL_NEON_LOADSTORE_OP(v_int64x2, int64, s64) OPENCV_HAL_IMPL_NEON_LOADSTORE_OP(v_float32x4, float, f32) +#if CV_SIMD128_64F +OPENCV_HAL_IMPL_NEON_LOADSTORE_OP(v_float64x2, double, f64) +#endif + +inline unsigned v_reduce_sum(const v_uint8x16& a) +{ + uint32x4_t t0 = vpaddlq_u16(vpaddlq_u8(a.val)); + uint32x2_t t1 = vpadd_u32(vget_low_u32(t0), vget_high_u32(t0)); + return vget_lane_u32(vpadd_u32(t1, t1), 0); +} +inline int v_reduce_sum(const v_int8x16& a) +{ + int32x4_t t0 = vpaddlq_s16(vpaddlq_s8(a.val)); + int32x2_t t1 = vpadd_s32(vget_low_s32(t0), vget_high_s32(t0)); + return vget_lane_s32(vpadd_s32(t1, t1), 0); +} +inline unsigned v_reduce_sum(const v_uint16x8& a) +{ + uint32x4_t t0 = vpaddlq_u16(a.val); + uint32x2_t t1 = vpadd_u32(vget_low_u32(t0), vget_high_u32(t0)); + return vget_lane_u32(vpadd_u32(t1, t1), 0); +} +inline int v_reduce_sum(const v_int16x8& a) +{ + int32x4_t t0 = vpaddlq_s16(a.val); + int32x2_t t1 = vpadd_s32(vget_low_s32(t0), vget_high_s32(t0)); + return vget_lane_s32(vpadd_s32(t1, t1), 0); +} -#define OPENCV_HAL_IMPL_NEON_REDUCE_OP_4(_Tpvec, scalartype, func, scalar_func) \ +#define OPENCV_HAL_IMPL_NEON_REDUCE_OP_16(_Tpvec, _Tpnvec, scalartype, func, vectorfunc, suffix) \ inline scalartype v_reduce_##func(const _Tpvec& a) \ { \ - scalartype CV_DECL_ALIGNED(16) buf[4]; \ - v_store_aligned(buf, a); \ - scalartype s0 = scalar_func(buf[0], buf[1]); \ - scalartype s1 = scalar_func(buf[2], buf[3]); \ - return scalar_func(s0, s1); \ + _Tpnvec##_t a0 = vp##vectorfunc##_##suffix(vget_low_##suffix(a.val), vget_high_##suffix(a.val)); \ + a0 = vp##vectorfunc##_##suffix(a0, a0); \ + a0 = vp##vectorfunc##_##suffix(a0, a0); \ + return (scalartype)vget_lane_##suffix(vp##vectorfunc##_##suffix(a0, a0),0); \ } -OPENCV_HAL_IMPL_NEON_REDUCE_OP_4(v_uint32x4, unsigned, sum, OPENCV_HAL_ADD) -OPENCV_HAL_IMPL_NEON_REDUCE_OP_4(v_uint32x4, unsigned, max, std::max) -OPENCV_HAL_IMPL_NEON_REDUCE_OP_4(v_uint32x4, unsigned, min, std::min) -OPENCV_HAL_IMPL_NEON_REDUCE_OP_4(v_int32x4, int, sum, OPENCV_HAL_ADD) -OPENCV_HAL_IMPL_NEON_REDUCE_OP_4(v_int32x4, int, max, std::max) -OPENCV_HAL_IMPL_NEON_REDUCE_OP_4(v_int32x4, int, min, std::min) -OPENCV_HAL_IMPL_NEON_REDUCE_OP_4(v_float32x4, float, sum, OPENCV_HAL_ADD) -OPENCV_HAL_IMPL_NEON_REDUCE_OP_4(v_float32x4, float, max, std::max) -OPENCV_HAL_IMPL_NEON_REDUCE_OP_4(v_float32x4, float, min, std::min) +OPENCV_HAL_IMPL_NEON_REDUCE_OP_16(v_uint8x16, uint8x8, uchar, max, max, u8) +OPENCV_HAL_IMPL_NEON_REDUCE_OP_16(v_uint8x16, uint8x8, uchar, min, min, u8) +OPENCV_HAL_IMPL_NEON_REDUCE_OP_16(v_int8x16, int8x8, schar, max, max, s8) +OPENCV_HAL_IMPL_NEON_REDUCE_OP_16(v_int8x16, int8x8, schar, min, min, s8) + +#define OPENCV_HAL_IMPL_NEON_REDUCE_OP_8(_Tpvec, _Tpnvec, scalartype, func, vectorfunc, suffix) \ +inline scalartype v_reduce_##func(const _Tpvec& a) \ +{ \ + _Tpnvec##_t a0 = vp##vectorfunc##_##suffix(vget_low_##suffix(a.val), vget_high_##suffix(a.val)); \ + a0 = vp##vectorfunc##_##suffix(a0, a0); \ + return (scalartype)vget_lane_##suffix(vp##vectorfunc##_##suffix(a0, a0),0); \ +} + +OPENCV_HAL_IMPL_NEON_REDUCE_OP_8(v_uint16x8, uint16x4, ushort, max, max, u16) +OPENCV_HAL_IMPL_NEON_REDUCE_OP_8(v_uint16x8, uint16x4, ushort, min, min, u16) +OPENCV_HAL_IMPL_NEON_REDUCE_OP_8(v_int16x8, int16x4, short, max, max, s16) +OPENCV_HAL_IMPL_NEON_REDUCE_OP_8(v_int16x8, int16x4, short, min, min, s16) + +#define OPENCV_HAL_IMPL_NEON_REDUCE_OP_4(_Tpvec, _Tpnvec, scalartype, func, vectorfunc, suffix) \ +inline scalartype v_reduce_##func(const _Tpvec& a) \ +{ \ + _Tpnvec##_t a0 = vp##vectorfunc##_##suffix(vget_low_##suffix(a.val), vget_high_##suffix(a.val)); \ + return (scalartype)vget_lane_##suffix(vp##vectorfunc##_##suffix(a0, vget_high_##suffix(a.val)),0); \ +} + +OPENCV_HAL_IMPL_NEON_REDUCE_OP_4(v_uint32x4, uint32x2, unsigned, sum, add, u32) +OPENCV_HAL_IMPL_NEON_REDUCE_OP_4(v_uint32x4, uint32x2, unsigned, max, max, u32) +OPENCV_HAL_IMPL_NEON_REDUCE_OP_4(v_uint32x4, uint32x2, unsigned, min, min, u32) +OPENCV_HAL_IMPL_NEON_REDUCE_OP_4(v_int32x4, int32x2, int, sum, add, s32) +OPENCV_HAL_IMPL_NEON_REDUCE_OP_4(v_int32x4, int32x2, int, max, max, s32) +OPENCV_HAL_IMPL_NEON_REDUCE_OP_4(v_int32x4, int32x2, int, min, min, s32) +OPENCV_HAL_IMPL_NEON_REDUCE_OP_4(v_float32x4, float32x2, float, sum, add, f32) +OPENCV_HAL_IMPL_NEON_REDUCE_OP_4(v_float32x4, float32x2, float, max, max, f32) +OPENCV_HAL_IMPL_NEON_REDUCE_OP_4(v_float32x4, float32x2, float, min, min, f32) + +inline uint64 v_reduce_sum(const v_uint64x2& a) +{ return vget_lane_u64(vadd_u64(vget_low_u64(a.val), vget_high_u64(a.val)),0); } +inline int64 v_reduce_sum(const v_int64x2& a) +{ return vget_lane_s64(vadd_s64(vget_low_s64(a.val), vget_high_s64(a.val)),0); } +#if CV_SIMD128_64F +inline double v_reduce_sum(const v_float64x2& a) +{ + return vgetq_lane_f64(a.val, 0) + vgetq_lane_f64(a.val, 1); +} +#endif + +inline v_float32x4 v_reduce_sum4(const v_float32x4& a, const v_float32x4& b, + const v_float32x4& c, const v_float32x4& d) +{ + float32x4x2_t ab = vtrnq_f32(a.val, b.val); + float32x4x2_t cd = vtrnq_f32(c.val, d.val); + + float32x4_t u0 = vaddq_f32(ab.val[0], ab.val[1]); // a0+a1 b0+b1 a2+a3 b2+b3 + float32x4_t u1 = vaddq_f32(cd.val[0], cd.val[1]); // c0+c1 d0+d1 c2+c3 d2+d3 + + float32x4_t v0 = vcombine_f32(vget_low_f32(u0), vget_low_f32(u1)); + float32x4_t v1 = vcombine_f32(vget_high_f32(u0), vget_high_f32(u1)); + + return v_float32x4(vaddq_f32(v0, v1)); +} + +inline unsigned v_reduce_sad(const v_uint8x16& a, const v_uint8x16& b) +{ + uint32x4_t t0 = vpaddlq_u16(vpaddlq_u8(vabdq_u8(a.val, b.val))); + uint32x2_t t1 = vpadd_u32(vget_low_u32(t0), vget_high_u32(t0)); + return vget_lane_u32(vpadd_u32(t1, t1), 0); +} +inline unsigned v_reduce_sad(const v_int8x16& a, const v_int8x16& b) +{ + uint32x4_t t0 = vpaddlq_u16(vpaddlq_u8(vreinterpretq_u8_s8(vabdq_s8(a.val, b.val)))); + uint32x2_t t1 = vpadd_u32(vget_low_u32(t0), vget_high_u32(t0)); + return vget_lane_u32(vpadd_u32(t1, t1), 0); +} +inline unsigned v_reduce_sad(const v_uint16x8& a, const v_uint16x8& b) +{ + uint32x4_t t0 = vpaddlq_u16(vabdq_u16(a.val, b.val)); + uint32x2_t t1 = vpadd_u32(vget_low_u32(t0), vget_high_u32(t0)); + return vget_lane_u32(vpadd_u32(t1, t1), 0); +} +inline unsigned v_reduce_sad(const v_int16x8& a, const v_int16x8& b) +{ + uint32x4_t t0 = vpaddlq_u16(vreinterpretq_u16_s16(vabdq_s16(a.val, b.val))); + uint32x2_t t1 = vpadd_u32(vget_low_u32(t0), vget_high_u32(t0)); + return vget_lane_u32(vpadd_u32(t1, t1), 0); +} +inline unsigned v_reduce_sad(const v_uint32x4& a, const v_uint32x4& b) +{ + uint32x4_t t0 = vabdq_u32(a.val, b.val); + uint32x2_t t1 = vpadd_u32(vget_low_u32(t0), vget_high_u32(t0)); + return vget_lane_u32(vpadd_u32(t1, t1), 0); +} +inline unsigned v_reduce_sad(const v_int32x4& a, const v_int32x4& b) +{ + uint32x4_t t0 = vreinterpretq_u32_s32(vabdq_s32(a.val, b.val)); + uint32x2_t t1 = vpadd_u32(vget_low_u32(t0), vget_high_u32(t0)); + return vget_lane_u32(vpadd_u32(t1, t1), 0); +} +inline float v_reduce_sad(const v_float32x4& a, const v_float32x4& b) +{ + float32x4_t t0 = vabdq_f32(a.val, b.val); + float32x2_t t1 = vpadd_f32(vget_low_f32(t0), vget_high_f32(t0)); + return vget_lane_f32(vpadd_f32(t1, t1), 0); +} + +inline v_uint8x16 v_popcount(const v_uint8x16& a) +{ return v_uint8x16(vcntq_u8(a.val)); } +inline v_uint8x16 v_popcount(const v_int8x16& a) +{ return v_uint8x16(vcntq_u8(vreinterpretq_u8_s8(a.val))); } +inline v_uint16x8 v_popcount(const v_uint16x8& a) +{ return v_uint16x8(vpaddlq_u8(vcntq_u8(vreinterpretq_u8_u16(a.val)))); } +inline v_uint16x8 v_popcount(const v_int16x8& a) +{ return v_uint16x8(vpaddlq_u8(vcntq_u8(vreinterpretq_u8_s16(a.val)))); } +inline v_uint32x4 v_popcount(const v_uint32x4& a) +{ return v_uint32x4(vpaddlq_u16(vpaddlq_u8(vcntq_u8(vreinterpretq_u8_u32(a.val))))); } +inline v_uint32x4 v_popcount(const v_int32x4& a) +{ return v_uint32x4(vpaddlq_u16(vpaddlq_u8(vcntq_u8(vreinterpretq_u8_s32(a.val))))); } +inline v_uint64x2 v_popcount(const v_uint64x2& a) +{ return v_uint64x2(vpaddlq_u32(vpaddlq_u16(vpaddlq_u8(vcntq_u8(vreinterpretq_u8_u64(a.val)))))); } +inline v_uint64x2 v_popcount(const v_int64x2& a) +{ return v_uint64x2(vpaddlq_u32(vpaddlq_u16(vpaddlq_u8(vcntq_u8(vreinterpretq_u8_s64(a.val)))))); } inline int v_signmask(const v_uint8x16& a) { @@ -627,6 +1402,31 @@ inline int v_signmask(const v_int32x4& a) { return v_signmask(v_reinterpret_as_u32(a)); } inline int v_signmask(const v_float32x4& a) { return v_signmask(v_reinterpret_as_u32(a)); } +inline int v_signmask(const v_uint64x2& a) +{ + int64x1_t m0 = vdup_n_s64(0); + uint64x2_t v0 = vshlq_u64(vshrq_n_u64(a.val, 63), vcombine_s64(m0, m0)); + return (int)vgetq_lane_u64(v0, 0) + ((int)vgetq_lane_u64(v0, 1) << 1); +} +inline int v_signmask(const v_int64x2& a) +{ return v_signmask(v_reinterpret_as_u64(a)); } +#if CV_SIMD128_64F +inline int v_signmask(const v_float64x2& a) +{ return v_signmask(v_reinterpret_as_u64(a)); } +#endif + +inline int v_scan_forward(const v_int8x16& a) { return trailingZeros32(v_signmask(a)); } +inline int v_scan_forward(const v_uint8x16& a) { return trailingZeros32(v_signmask(a)); } +inline int v_scan_forward(const v_int16x8& a) { return trailingZeros32(v_signmask(a)); } +inline int v_scan_forward(const v_uint16x8& a) { return trailingZeros32(v_signmask(a)); } +inline int v_scan_forward(const v_int32x4& a) { return trailingZeros32(v_signmask(a)); } +inline int v_scan_forward(const v_uint32x4& a) { return trailingZeros32(v_signmask(a)); } +inline int v_scan_forward(const v_float32x4& a) { return trailingZeros32(v_signmask(a)); } +inline int v_scan_forward(const v_int64x2& a) { return trailingZeros32(v_signmask(a)); } +inline int v_scan_forward(const v_uint64x2& a) { return trailingZeros32(v_signmask(a)); } +#if CV_SIMD128_64F +inline int v_scan_forward(const v_float64x2& a) { return trailingZeros32(v_signmask(a)); } +#endif #define OPENCV_HAL_IMPL_NEON_CHECK_ALLANY(_Tpvec, suffix, shift) \ inline bool v_check_all(const v_##_Tpvec& a) \ @@ -646,6 +1446,17 @@ OPENCV_HAL_IMPL_NEON_CHECK_ALLANY(uint8x16, u8, 7) OPENCV_HAL_IMPL_NEON_CHECK_ALLANY(uint16x8, u16, 15) OPENCV_HAL_IMPL_NEON_CHECK_ALLANY(uint32x4, u32, 31) +inline bool v_check_all(const v_uint64x2& a) +{ + uint64x2_t v0 = vshrq_n_u64(a.val, 63); + return (vgetq_lane_u64(v0, 0) & vgetq_lane_u64(v0, 1)) == 1; +} +inline bool v_check_any(const v_uint64x2& a) +{ + uint64x2_t v0 = vshrq_n_u64(a.val, 63); + return (vgetq_lane_u64(v0, 0) | vgetq_lane_u64(v0, 1)) != 0; +} + inline bool v_check_all(const v_int8x16& a) { return v_check_all(v_reinterpret_as_u8(a)); } inline bool v_check_all(const v_int16x8& a) @@ -664,6 +1475,17 @@ inline bool v_check_any(const v_int32x4& a) inline bool v_check_any(const v_float32x4& a) { return v_check_any(v_reinterpret_as_u32(a)); } +inline bool v_check_all(const v_int64x2& a) +{ return v_check_all(v_reinterpret_as_u64(a)); } +inline bool v_check_any(const v_int64x2& a) +{ return v_check_any(v_reinterpret_as_u64(a)); } +#if CV_SIMD128_64F +inline bool v_check_all(const v_float64x2& a) +{ return v_check_all(v_reinterpret_as_u64(a)); } +inline bool v_check_any(const v_float64x2& a) +{ return v_check_any(v_reinterpret_as_u64(a)); } +#endif + #define OPENCV_HAL_IMPL_NEON_SELECT(_Tpvec, suffix, usuffix) \ inline _Tpvec v_select(const _Tpvec& mask, const _Tpvec& a, const _Tpvec& b) \ { \ @@ -677,6 +1499,9 @@ OPENCV_HAL_IMPL_NEON_SELECT(v_int16x8, s16, u16) OPENCV_HAL_IMPL_NEON_SELECT(v_uint32x4, u32, u32) OPENCV_HAL_IMPL_NEON_SELECT(v_int32x4, s32, u32) OPENCV_HAL_IMPL_NEON_SELECT(v_float32x4, f32, u32) +#if CV_SIMD128_64F +OPENCV_HAL_IMPL_NEON_SELECT(v_float64x2, f64, u64) +#endif #define OPENCV_HAL_IMPL_NEON_EXPAND(_Tpvec, _Tpwvec, _Tp, suffix) \ inline void v_expand(const _Tpvec& a, _Tpwvec& b0, _Tpwvec& b1) \ @@ -684,6 +1509,14 @@ inline void v_expand(const _Tpvec& a, _Tpwvec& b0, _Tpwvec& b1) \ b0.val = vmovl_##suffix(vget_low_##suffix(a.val)); \ b1.val = vmovl_##suffix(vget_high_##suffix(a.val)); \ } \ +inline _Tpwvec v_expand_low(const _Tpvec& a) \ +{ \ + return _Tpwvec(vmovl_##suffix(vget_low_##suffix(a.val))); \ +} \ +inline _Tpwvec v_expand_high(const _Tpvec& a) \ +{ \ + return _Tpwvec(vmovl_##suffix(vget_high_##suffix(a.val))); \ +} \ inline _Tpwvec v_load_expand(const _Tp* ptr) \ { \ return _Tpwvec(vmovl_##suffix(vld1_##suffix(ptr))); \ @@ -698,18 +1531,41 @@ OPENCV_HAL_IMPL_NEON_EXPAND(v_int32x4, v_int64x2, int, s32) inline v_uint32x4 v_load_expand_q(const uchar* ptr) { - uint8x8_t v0 = vcreate_u8(*(unsigned*)ptr); + typedef unsigned int CV_DECL_ALIGNED(1) unaligned_uint; + uint8x8_t v0 = vcreate_u8(*(unaligned_uint*)ptr); uint16x4_t v1 = vget_low_u16(vmovl_u8(v0)); return v_uint32x4(vmovl_u16(v1)); } inline v_int32x4 v_load_expand_q(const schar* ptr) { - int8x8_t v0 = vcreate_s8(*(unsigned*)ptr); + typedef unsigned int CV_DECL_ALIGNED(1) unaligned_uint; + int8x8_t v0 = vcreate_s8(*(unaligned_uint*)ptr); int16x4_t v1 = vget_low_s16(vmovl_s8(v0)); return v_int32x4(vmovl_s16(v1)); } +#if defined(__aarch64__) || defined(_M_ARM64) +#define OPENCV_HAL_IMPL_NEON_UNPACKS(_Tpvec, suffix) \ +inline void v_zip(const v_##_Tpvec& a0, const v_##_Tpvec& a1, v_##_Tpvec& b0, v_##_Tpvec& b1) \ +{ \ + b0.val = vzip1q_##suffix(a0.val, a1.val); \ + b1.val = vzip2q_##suffix(a0.val, a1.val); \ +} \ +inline v_##_Tpvec v_combine_low(const v_##_Tpvec& a, const v_##_Tpvec& b) \ +{ \ + return v_##_Tpvec(vcombine_##suffix(vget_low_##suffix(a.val), vget_low_##suffix(b.val))); \ +} \ +inline v_##_Tpvec v_combine_high(const v_##_Tpvec& a, const v_##_Tpvec& b) \ +{ \ + return v_##_Tpvec(vcombine_##suffix(vget_high_##suffix(a.val), vget_high_##suffix(b.val))); \ +} \ +inline void v_recombine(const v_##_Tpvec& a, const v_##_Tpvec& b, v_##_Tpvec& c, v_##_Tpvec& d) \ +{ \ + c.val = vcombine_##suffix(vget_low_##suffix(a.val), vget_low_##suffix(b.val)); \ + d.val = vcombine_##suffix(vget_high_##suffix(a.val), vget_high_##suffix(b.val)); \ +} +#else #define OPENCV_HAL_IMPL_NEON_UNPACKS(_Tpvec, suffix) \ inline void v_zip(const v_##_Tpvec& a0, const v_##_Tpvec& a1, v_##_Tpvec& b0, v_##_Tpvec& b1) \ { \ @@ -730,6 +1586,7 @@ inline void v_recombine(const v_##_Tpvec& a, const v_##_Tpvec& b, v_##_Tpvec& c, c.val = vcombine_##suffix(vget_low_##suffix(a.val), vget_low_##suffix(b.val)); \ d.val = vcombine_##suffix(vget_high_##suffix(a.val), vget_high_##suffix(b.val)); \ } +#endif OPENCV_HAL_IMPL_NEON_UNPACKS(uint8x16, u8) OPENCV_HAL_IMPL_NEON_UNPACKS(int8x16, s8) @@ -738,6 +1595,55 @@ OPENCV_HAL_IMPL_NEON_UNPACKS(int16x8, s16) OPENCV_HAL_IMPL_NEON_UNPACKS(uint32x4, u32) OPENCV_HAL_IMPL_NEON_UNPACKS(int32x4, s32) OPENCV_HAL_IMPL_NEON_UNPACKS(float32x4, f32) +#if CV_SIMD128_64F +OPENCV_HAL_IMPL_NEON_UNPACKS(float64x2, f64) +#endif + +inline v_uint8x16 v_reverse(const v_uint8x16 &a) +{ + uint8x16_t vec = vrev64q_u8(a.val); + return v_uint8x16(vextq_u8(vec, vec, 8)); +} + +inline v_int8x16 v_reverse(const v_int8x16 &a) +{ return v_reinterpret_as_s8(v_reverse(v_reinterpret_as_u8(a))); } + +inline v_uint16x8 v_reverse(const v_uint16x8 &a) +{ + uint16x8_t vec = vrev64q_u16(a.val); + return v_uint16x8(vextq_u16(vec, vec, 4)); +} + +inline v_int16x8 v_reverse(const v_int16x8 &a) +{ return v_reinterpret_as_s16(v_reverse(v_reinterpret_as_u16(a))); } + +inline v_uint32x4 v_reverse(const v_uint32x4 &a) +{ + uint32x4_t vec = vrev64q_u32(a.val); + return v_uint32x4(vextq_u32(vec, vec, 2)); +} + +inline v_int32x4 v_reverse(const v_int32x4 &a) +{ return v_reinterpret_as_s32(v_reverse(v_reinterpret_as_u32(a))); } + +inline v_float32x4 v_reverse(const v_float32x4 &a) +{ return v_reinterpret_as_f32(v_reverse(v_reinterpret_as_u32(a))); } + +inline v_uint64x2 v_reverse(const v_uint64x2 &a) +{ + uint64x2_t vec = a.val; + uint64x1_t vec_lo = vget_low_u64(vec); + uint64x1_t vec_hi = vget_high_u64(vec); + return v_uint64x2(vcombine_u64(vec_hi, vec_lo)); +} + +inline v_int64x2 v_reverse(const v_int64x2 &a) +{ return v_reinterpret_as_s64(v_reverse(v_reinterpret_as_u64(a))); } + +#if CV_SIMD128_64F +inline v_float64x2 v_reverse(const v_float64x2 &a) +{ return v_reinterpret_as_f64(v_reverse(v_reinterpret_as_u64(a))); } +#endif #define OPENCV_HAL_IMPL_NEON_EXTRACT(_Tpvec, suffix) \ template \ @@ -755,7 +1661,54 @@ OPENCV_HAL_IMPL_NEON_EXTRACT(int32x4, s32) OPENCV_HAL_IMPL_NEON_EXTRACT(uint64x2, u64) OPENCV_HAL_IMPL_NEON_EXTRACT(int64x2, s64) OPENCV_HAL_IMPL_NEON_EXTRACT(float32x4, f32) +#if CV_SIMD128_64F +OPENCV_HAL_IMPL_NEON_EXTRACT(float64x2, f64) +#endif + +#define OPENCV_HAL_IMPL_NEON_EXTRACT_N(_Tpvec, _Tp, suffix) \ +template inline _Tp v_extract_n(_Tpvec v) { return vgetq_lane_##suffix(v.val, i); } + +OPENCV_HAL_IMPL_NEON_EXTRACT_N(v_uint8x16, uchar, u8) +OPENCV_HAL_IMPL_NEON_EXTRACT_N(v_int8x16, schar, s8) +OPENCV_HAL_IMPL_NEON_EXTRACT_N(v_uint16x8, ushort, u16) +OPENCV_HAL_IMPL_NEON_EXTRACT_N(v_int16x8, short, s16) +OPENCV_HAL_IMPL_NEON_EXTRACT_N(v_uint32x4, uint, u32) +OPENCV_HAL_IMPL_NEON_EXTRACT_N(v_int32x4, int, s32) +OPENCV_HAL_IMPL_NEON_EXTRACT_N(v_uint64x2, uint64, u64) +OPENCV_HAL_IMPL_NEON_EXTRACT_N(v_int64x2, int64, s64) +OPENCV_HAL_IMPL_NEON_EXTRACT_N(v_float32x4, float, f32) +#if CV_SIMD128_64F +OPENCV_HAL_IMPL_NEON_EXTRACT_N(v_float64x2, double, f64) +#endif +#define OPENCV_HAL_IMPL_NEON_BROADCAST(_Tpvec, _Tp, suffix) \ +template inline _Tpvec v_broadcast_element(_Tpvec v) { _Tp t = v_extract_n(v); return v_setall_##suffix(t); } + +OPENCV_HAL_IMPL_NEON_BROADCAST(v_uint8x16, uchar, u8) +OPENCV_HAL_IMPL_NEON_BROADCAST(v_int8x16, schar, s8) +OPENCV_HAL_IMPL_NEON_BROADCAST(v_uint16x8, ushort, u16) +OPENCV_HAL_IMPL_NEON_BROADCAST(v_int16x8, short, s16) +OPENCV_HAL_IMPL_NEON_BROADCAST(v_uint32x4, uint, u32) +OPENCV_HAL_IMPL_NEON_BROADCAST(v_int32x4, int, s32) +OPENCV_HAL_IMPL_NEON_BROADCAST(v_uint64x2, uint64, u64) +OPENCV_HAL_IMPL_NEON_BROADCAST(v_int64x2, int64, s64) +OPENCV_HAL_IMPL_NEON_BROADCAST(v_float32x4, float, f32) +#if CV_SIMD128_64F +OPENCV_HAL_IMPL_NEON_BROADCAST(v_float64x2, double, f64) +#endif + +#if CV_SIMD128_64F +inline v_int32x4 v_round(const v_float32x4& a) +{ + float32x4_t a_ = a.val; + int32x4_t result; + __asm__ ("fcvtns %0.4s, %1.4s" + : "=w"(result) + : "w"(a_) + : /* No clobbers */); + return v_int32x4(result); +} +#else inline v_int32x4 v_round(const v_float32x4& a) { static const int32x4_t v_sign = vdupq_n_s32(1 << 31), @@ -764,7 +1717,7 @@ inline v_int32x4 v_round(const v_float32x4& a) int32x4_t v_addition = vorrq_s32(v_05, vandq_s32(v_sign, vreinterpretq_s32_f32(a.val))); return v_int32x4(vcvtq_s32_f32(vaddq_f32(a.val, vreinterpretq_f32_s32(v_addition)))); } - +#endif inline v_int32x4 v_floor(const v_float32x4& a) { int32x4_t a1 = vcvtq_s32_f32(a.val); @@ -782,6 +1735,43 @@ inline v_int32x4 v_ceil(const v_float32x4& a) inline v_int32x4 v_trunc(const v_float32x4& a) { return v_int32x4(vcvtq_s32_f32(a.val)); } +#if CV_SIMD128_64F +inline v_int32x4 v_round(const v_float64x2& a) +{ + static const int32x2_t zero = vdup_n_s32(0); + return v_int32x4(vcombine_s32(vmovn_s64(vcvtaq_s64_f64(a.val)), zero)); +} + +inline v_int32x4 v_round(const v_float64x2& a, const v_float64x2& b) +{ + return v_int32x4(vcombine_s32(vmovn_s64(vcvtaq_s64_f64(a.val)), vmovn_s64(vcvtaq_s64_f64(b.val)))); +} + +inline v_int32x4 v_floor(const v_float64x2& a) +{ + static const int32x2_t zero = vdup_n_s32(0); + int64x2_t a1 = vcvtq_s64_f64(a.val); + uint64x2_t mask = vcgtq_f64(vcvtq_f64_s64(a1), a.val); + a1 = vaddq_s64(a1, vreinterpretq_s64_u64(mask)); + return v_int32x4(vcombine_s32(vmovn_s64(a1), zero)); +} + +inline v_int32x4 v_ceil(const v_float64x2& a) +{ + static const int32x2_t zero = vdup_n_s32(0); + int64x2_t a1 = vcvtq_s64_f64(a.val); + uint64x2_t mask = vcgtq_f64(a.val, vcvtq_f64_s64(a1)); + a1 = vsubq_s64(a1, vreinterpretq_s64_u64(mask)); + return v_int32x4(vcombine_s32(vmovn_s64(a1), zero)); +} + +inline v_int32x4 v_trunc(const v_float64x2& a) +{ + static const int32x2_t zero = vdup_n_s32(0); + return v_int32x4(vcombine_s32(vmovn_s64(vcvtaq_s64_f64(a.val)), zero)); +} +#endif + #define OPENCV_HAL_IMPL_NEON_TRANSPOSE4x4(_Tpvec, suffix) \ inline void v_transpose4x4(const v_##_Tpvec& a0, const v_##_Tpvec& a1, \ const v_##_Tpvec& a2, const v_##_Tpvec& a3, \ @@ -809,6 +1799,12 @@ OPENCV_HAL_IMPL_NEON_TRANSPOSE4x4(int32x4, s32) OPENCV_HAL_IMPL_NEON_TRANSPOSE4x4(float32x4, f32) #define OPENCV_HAL_IMPL_NEON_INTERLEAVED(_Tpvec, _Tp, suffix) \ +inline void v_load_deinterleave(const _Tp* ptr, v_##_Tpvec& a, v_##_Tpvec& b) \ +{ \ + _Tpvec##x2_t v = vld2q_##suffix(ptr); \ + a.val = v.val[0]; \ + b.val = v.val[1]; \ +} \ inline void v_load_deinterleave(const _Tp* ptr, v_##_Tpvec& a, v_##_Tpvec& b, v_##_Tpvec& c) \ { \ _Tpvec##x3_t v = vld3q_##suffix(ptr); \ @@ -825,7 +1821,16 @@ inline void v_load_deinterleave(const _Tp* ptr, v_##_Tpvec& a, v_##_Tpvec& b, \ c.val = v.val[2]; \ d.val = v.val[3]; \ } \ -inline void v_store_interleave( _Tp* ptr, const v_##_Tpvec& a, const v_##_Tpvec& b, const v_##_Tpvec& c) \ +inline void v_store_interleave( _Tp* ptr, const v_##_Tpvec& a, const v_##_Tpvec& b, \ + hal::StoreMode /*mode*/=hal::STORE_UNALIGNED) \ +{ \ + _Tpvec##x2_t v; \ + v.val[0] = a.val; \ + v.val[1] = b.val; \ + vst2q_##suffix(ptr, v); \ +} \ +inline void v_store_interleave( _Tp* ptr, const v_##_Tpvec& a, const v_##_Tpvec& b, \ + const v_##_Tpvec& c, hal::StoreMode /*mode*/=hal::STORE_UNALIGNED) \ { \ _Tpvec##x3_t v; \ v.val[0] = a.val; \ @@ -834,7 +1839,8 @@ inline void v_store_interleave( _Tp* ptr, const v_##_Tpvec& a, const v_##_Tpvec& vst3q_##suffix(ptr, v); \ } \ inline void v_store_interleave( _Tp* ptr, const v_##_Tpvec& a, const v_##_Tpvec& b, \ - const v_##_Tpvec& c, const v_##_Tpvec& d) \ + const v_##_Tpvec& c, const v_##_Tpvec& d, \ + hal::StoreMode /*mode*/=hal::STORE_UNALIGNED ) \ { \ _Tpvec##x4_t v; \ v.val[0] = a.val; \ @@ -844,6 +1850,83 @@ inline void v_store_interleave( _Tp* ptr, const v_##_Tpvec& a, const v_##_Tpvec& vst4q_##suffix(ptr, v); \ } +#define OPENCV_HAL_IMPL_NEON_INTERLEAVED_INT64(tp, suffix) \ +inline void v_load_deinterleave( const tp* ptr, v_##tp##x2& a, v_##tp##x2& b ) \ +{ \ + tp##x1_t a0 = vld1_##suffix(ptr); \ + tp##x1_t b0 = vld1_##suffix(ptr + 1); \ + tp##x1_t a1 = vld1_##suffix(ptr + 2); \ + tp##x1_t b1 = vld1_##suffix(ptr + 3); \ + a = v_##tp##x2(vcombine_##suffix(a0, a1)); \ + b = v_##tp##x2(vcombine_##suffix(b0, b1)); \ +} \ + \ +inline void v_load_deinterleave( const tp* ptr, v_##tp##x2& a, \ + v_##tp##x2& b, v_##tp##x2& c ) \ +{ \ + tp##x1_t a0 = vld1_##suffix(ptr); \ + tp##x1_t b0 = vld1_##suffix(ptr + 1); \ + tp##x1_t c0 = vld1_##suffix(ptr + 2); \ + tp##x1_t a1 = vld1_##suffix(ptr + 3); \ + tp##x1_t b1 = vld1_##suffix(ptr + 4); \ + tp##x1_t c1 = vld1_##suffix(ptr + 5); \ + a = v_##tp##x2(vcombine_##suffix(a0, a1)); \ + b = v_##tp##x2(vcombine_##suffix(b0, b1)); \ + c = v_##tp##x2(vcombine_##suffix(c0, c1)); \ +} \ + \ +inline void v_load_deinterleave( const tp* ptr, v_##tp##x2& a, v_##tp##x2& b, \ + v_##tp##x2& c, v_##tp##x2& d ) \ +{ \ + tp##x1_t a0 = vld1_##suffix(ptr); \ + tp##x1_t b0 = vld1_##suffix(ptr + 1); \ + tp##x1_t c0 = vld1_##suffix(ptr + 2); \ + tp##x1_t d0 = vld1_##suffix(ptr + 3); \ + tp##x1_t a1 = vld1_##suffix(ptr + 4); \ + tp##x1_t b1 = vld1_##suffix(ptr + 5); \ + tp##x1_t c1 = vld1_##suffix(ptr + 6); \ + tp##x1_t d1 = vld1_##suffix(ptr + 7); \ + a = v_##tp##x2(vcombine_##suffix(a0, a1)); \ + b = v_##tp##x2(vcombine_##suffix(b0, b1)); \ + c = v_##tp##x2(vcombine_##suffix(c0, c1)); \ + d = v_##tp##x2(vcombine_##suffix(d0, d1)); \ +} \ + \ +inline void v_store_interleave( tp* ptr, const v_##tp##x2& a, const v_##tp##x2& b, \ + hal::StoreMode /*mode*/=hal::STORE_UNALIGNED) \ +{ \ + vst1_##suffix(ptr, vget_low_##suffix(a.val)); \ + vst1_##suffix(ptr + 1, vget_low_##suffix(b.val)); \ + vst1_##suffix(ptr + 2, vget_high_##suffix(a.val)); \ + vst1_##suffix(ptr + 3, vget_high_##suffix(b.val)); \ +} \ + \ +inline void v_store_interleave( tp* ptr, const v_##tp##x2& a, \ + const v_##tp##x2& b, const v_##tp##x2& c, \ + hal::StoreMode /*mode*/=hal::STORE_UNALIGNED) \ +{ \ + vst1_##suffix(ptr, vget_low_##suffix(a.val)); \ + vst1_##suffix(ptr + 1, vget_low_##suffix(b.val)); \ + vst1_##suffix(ptr + 2, vget_low_##suffix(c.val)); \ + vst1_##suffix(ptr + 3, vget_high_##suffix(a.val)); \ + vst1_##suffix(ptr + 4, vget_high_##suffix(b.val)); \ + vst1_##suffix(ptr + 5, vget_high_##suffix(c.val)); \ +} \ + \ +inline void v_store_interleave( tp* ptr, const v_##tp##x2& a, const v_##tp##x2& b, \ + const v_##tp##x2& c, const v_##tp##x2& d, \ + hal::StoreMode /*mode*/=hal::STORE_UNALIGNED) \ +{ \ + vst1_##suffix(ptr, vget_low_##suffix(a.val)); \ + vst1_##suffix(ptr + 1, vget_low_##suffix(b.val)); \ + vst1_##suffix(ptr + 2, vget_low_##suffix(c.val)); \ + vst1_##suffix(ptr + 3, vget_low_##suffix(d.val)); \ + vst1_##suffix(ptr + 4, vget_high_##suffix(a.val)); \ + vst1_##suffix(ptr + 5, vget_high_##suffix(b.val)); \ + vst1_##suffix(ptr + 6, vget_high_##suffix(c.val)); \ + vst1_##suffix(ptr + 7, vget_high_##suffix(d.val)); \ +} + OPENCV_HAL_IMPL_NEON_INTERLEAVED(uint8x16, uchar, u8) OPENCV_HAL_IMPL_NEON_INTERLEAVED(int8x16, schar, s8) OPENCV_HAL_IMPL_NEON_INTERLEAVED(uint16x8, ushort, u16) @@ -851,12 +1934,411 @@ OPENCV_HAL_IMPL_NEON_INTERLEAVED(int16x8, short, s16) OPENCV_HAL_IMPL_NEON_INTERLEAVED(uint32x4, unsigned, u32) OPENCV_HAL_IMPL_NEON_INTERLEAVED(int32x4, int, s32) OPENCV_HAL_IMPL_NEON_INTERLEAVED(float32x4, float, f32) +#if CV_SIMD128_64F +OPENCV_HAL_IMPL_NEON_INTERLEAVED(float64x2, double, f64) +#endif + +OPENCV_HAL_IMPL_NEON_INTERLEAVED_INT64(int64, s64) +OPENCV_HAL_IMPL_NEON_INTERLEAVED_INT64(uint64, u64) inline v_float32x4 v_cvt_f32(const v_int32x4& a) { return v_float32x4(vcvtq_f32_s32(a.val)); } +#if CV_SIMD128_64F +inline v_float32x4 v_cvt_f32(const v_float64x2& a) +{ + float32x2_t zero = vdup_n_f32(0.0f); + return v_float32x4(vcombine_f32(vcvt_f32_f64(a.val), zero)); +} + +inline v_float32x4 v_cvt_f32(const v_float64x2& a, const v_float64x2& b) +{ + return v_float32x4(vcombine_f32(vcvt_f32_f64(a.val), vcvt_f32_f64(b.val))); +} + +inline v_float64x2 v_cvt_f64(const v_int32x4& a) +{ + return v_float64x2(vcvt_f64_f32(vcvt_f32_s32(vget_low_s32(a.val)))); +} + +inline v_float64x2 v_cvt_f64_high(const v_int32x4& a) +{ + return v_float64x2(vcvt_f64_f32(vcvt_f32_s32(vget_high_s32(a.val)))); +} + +inline v_float64x2 v_cvt_f64(const v_float32x4& a) +{ + return v_float64x2(vcvt_f64_f32(vget_low_f32(a.val))); +} + +inline v_float64x2 v_cvt_f64_high(const v_float32x4& a) +{ + return v_float64x2(vcvt_f64_f32(vget_high_f32(a.val))); +} + +inline v_float64x2 v_cvt_f64(const v_int64x2& a) +{ return v_float64x2(vcvtq_f64_s64(a.val)); } + +#endif + +////////////// Lookup table access //////////////////// + +inline v_int8x16 v_lut(const schar* tab, const int* idx) +{ + schar CV_DECL_ALIGNED(32) elems[16] = + { + tab[idx[ 0]], + tab[idx[ 1]], + tab[idx[ 2]], + tab[idx[ 3]], + tab[idx[ 4]], + tab[idx[ 5]], + tab[idx[ 6]], + tab[idx[ 7]], + tab[idx[ 8]], + tab[idx[ 9]], + tab[idx[10]], + tab[idx[11]], + tab[idx[12]], + tab[idx[13]], + tab[idx[14]], + tab[idx[15]] + }; + return v_int8x16(vld1q_s8(elems)); +} +inline v_int8x16 v_lut_pairs(const schar* tab, const int* idx) +{ + schar CV_DECL_ALIGNED(32) elems[16] = + { + tab[idx[0]], + tab[idx[0] + 1], + tab[idx[1]], + tab[idx[1] + 1], + tab[idx[2]], + tab[idx[2] + 1], + tab[idx[3]], + tab[idx[3] + 1], + tab[idx[4]], + tab[idx[4] + 1], + tab[idx[5]], + tab[idx[5] + 1], + tab[idx[6]], + tab[idx[6] + 1], + tab[idx[7]], + tab[idx[7] + 1] + }; + return v_int8x16(vld1q_s8(elems)); +} +inline v_int8x16 v_lut_quads(const schar* tab, const int* idx) +{ + schar CV_DECL_ALIGNED(32) elems[16] = + { + tab[idx[0]], + tab[idx[0] + 1], + tab[idx[0] + 2], + tab[idx[0] + 3], + tab[idx[1]], + tab[idx[1] + 1], + tab[idx[1] + 2], + tab[idx[1] + 3], + tab[idx[2]], + tab[idx[2] + 1], + tab[idx[2] + 2], + tab[idx[2] + 3], + tab[idx[3]], + tab[idx[3] + 1], + tab[idx[3] + 2], + tab[idx[3] + 3] + }; + return v_int8x16(vld1q_s8(elems)); +} +inline v_uint8x16 v_lut(const uchar* tab, const int* idx) { return v_reinterpret_as_u8(v_lut((schar*)tab, idx)); } +inline v_uint8x16 v_lut_pairs(const uchar* tab, const int* idx) { return v_reinterpret_as_u8(v_lut_pairs((schar*)tab, idx)); } +inline v_uint8x16 v_lut_quads(const uchar* tab, const int* idx) { return v_reinterpret_as_u8(v_lut_quads((schar*)tab, idx)); } + +inline v_int16x8 v_lut(const short* tab, const int* idx) +{ + short CV_DECL_ALIGNED(32) elems[8] = + { + tab[idx[0]], + tab[idx[1]], + tab[idx[2]], + tab[idx[3]], + tab[idx[4]], + tab[idx[5]], + tab[idx[6]], + tab[idx[7]] + }; + return v_int16x8(vld1q_s16(elems)); +} +inline v_int16x8 v_lut_pairs(const short* tab, const int* idx) +{ + short CV_DECL_ALIGNED(32) elems[8] = + { + tab[idx[0]], + tab[idx[0] + 1], + tab[idx[1]], + tab[idx[1] + 1], + tab[idx[2]], + tab[idx[2] + 1], + tab[idx[3]], + tab[idx[3] + 1] + }; + return v_int16x8(vld1q_s16(elems)); +} +inline v_int16x8 v_lut_quads(const short* tab, const int* idx) +{ + return v_int16x8(vcombine_s16(vld1_s16(tab + idx[0]), vld1_s16(tab + idx[1]))); +} +inline v_uint16x8 v_lut(const ushort* tab, const int* idx) { return v_reinterpret_as_u16(v_lut((short*)tab, idx)); } +inline v_uint16x8 v_lut_pairs(const ushort* tab, const int* idx) { return v_reinterpret_as_u16(v_lut_pairs((short*)tab, idx)); } +inline v_uint16x8 v_lut_quads(const ushort* tab, const int* idx) { return v_reinterpret_as_u16(v_lut_quads((short*)tab, idx)); } + +inline v_int32x4 v_lut(const int* tab, const int* idx) +{ + int CV_DECL_ALIGNED(32) elems[4] = + { + tab[idx[0]], + tab[idx[1]], + tab[idx[2]], + tab[idx[3]] + }; + return v_int32x4(vld1q_s32(elems)); +} +inline v_int32x4 v_lut_pairs(const int* tab, const int* idx) +{ + return v_int32x4(vcombine_s32(vld1_s32(tab + idx[0]), vld1_s32(tab + idx[1]))); +} +inline v_int32x4 v_lut_quads(const int* tab, const int* idx) +{ + return v_int32x4(vld1q_s32(tab + idx[0])); +} +inline v_uint32x4 v_lut(const unsigned* tab, const int* idx) { return v_reinterpret_as_u32(v_lut((int*)tab, idx)); } +inline v_uint32x4 v_lut_pairs(const unsigned* tab, const int* idx) { return v_reinterpret_as_u32(v_lut_pairs((int*)tab, idx)); } +inline v_uint32x4 v_lut_quads(const unsigned* tab, const int* idx) { return v_reinterpret_as_u32(v_lut_quads((int*)tab, idx)); } + +inline v_int64x2 v_lut(const int64_t* tab, const int* idx) +{ + return v_int64x2(vcombine_s64(vcreate_s64(tab[idx[0]]), vcreate_s64(tab[idx[1]]))); +} +inline v_int64x2 v_lut_pairs(const int64_t* tab, const int* idx) +{ + return v_int64x2(vld1q_s64(tab + idx[0])); +} +inline v_uint64x2 v_lut(const uint64_t* tab, const int* idx) { return v_reinterpret_as_u64(v_lut((const int64_t *)tab, idx)); } +inline v_uint64x2 v_lut_pairs(const uint64_t* tab, const int* idx) { return v_reinterpret_as_u64(v_lut_pairs((const int64_t *)tab, idx)); } + +inline v_float32x4 v_lut(const float* tab, const int* idx) +{ + float CV_DECL_ALIGNED(32) elems[4] = + { + tab[idx[0]], + tab[idx[1]], + tab[idx[2]], + tab[idx[3]] + }; + return v_float32x4(vld1q_f32(elems)); +} +inline v_float32x4 v_lut_pairs(const float* tab, const int* idx) +{ + typedef uint64 CV_DECL_ALIGNED(1) unaligned_uint64; + + uint64 CV_DECL_ALIGNED(32) elems[2] = + { + *(unaligned_uint64*)(tab + idx[0]), + *(unaligned_uint64*)(tab + idx[1]) + }; + return v_float32x4(vreinterpretq_f32_u64(vld1q_u64(elems))); +} +inline v_float32x4 v_lut_quads(const float* tab, const int* idx) +{ + return v_float32x4(vld1q_f32(tab + idx[0])); +} + +inline v_int32x4 v_lut(const int* tab, const v_int32x4& idxvec) +{ + int CV_DECL_ALIGNED(32) elems[4] = + { + tab[vgetq_lane_s32(idxvec.val, 0)], + tab[vgetq_lane_s32(idxvec.val, 1)], + tab[vgetq_lane_s32(idxvec.val, 2)], + tab[vgetq_lane_s32(idxvec.val, 3)] + }; + return v_int32x4(vld1q_s32(elems)); +} + +inline v_uint32x4 v_lut(const unsigned* tab, const v_int32x4& idxvec) +{ + unsigned CV_DECL_ALIGNED(32) elems[4] = + { + tab[vgetq_lane_s32(idxvec.val, 0)], + tab[vgetq_lane_s32(idxvec.val, 1)], + tab[vgetq_lane_s32(idxvec.val, 2)], + tab[vgetq_lane_s32(idxvec.val, 3)] + }; + return v_uint32x4(vld1q_u32(elems)); +} + +inline v_float32x4 v_lut(const float* tab, const v_int32x4& idxvec) +{ + float CV_DECL_ALIGNED(32) elems[4] = + { + tab[vgetq_lane_s32(idxvec.val, 0)], + tab[vgetq_lane_s32(idxvec.val, 1)], + tab[vgetq_lane_s32(idxvec.val, 2)], + tab[vgetq_lane_s32(idxvec.val, 3)] + }; + return v_float32x4(vld1q_f32(elems)); +} + +inline void v_lut_deinterleave(const float* tab, const v_int32x4& idxvec, v_float32x4& x, v_float32x4& y) +{ + /*int CV_DECL_ALIGNED(32) idx[4]; + v_store(idx, idxvec); + + float32x4_t xy02 = vcombine_f32(vld1_f32(tab + idx[0]), vld1_f32(tab + idx[2])); + float32x4_t xy13 = vcombine_f32(vld1_f32(tab + idx[1]), vld1_f32(tab + idx[3])); + + float32x4x2_t xxyy = vuzpq_f32(xy02, xy13); + x = v_float32x4(xxyy.val[0]); + y = v_float32x4(xxyy.val[1]);*/ + int CV_DECL_ALIGNED(32) idx[4]; + v_store_aligned(idx, idxvec); + + x = v_float32x4(tab[idx[0]], tab[idx[1]], tab[idx[2]], tab[idx[3]]); + y = v_float32x4(tab[idx[0]+1], tab[idx[1]+1], tab[idx[2]+1], tab[idx[3]+1]); +} + +inline v_int8x16 v_interleave_pairs(const v_int8x16& vec) +{ + return v_int8x16(vcombine_s8(vtbl1_s8(vget_low_s8(vec.val), vcreate_s8(0x0705060403010200)), vtbl1_s8(vget_high_s8(vec.val), vcreate_s8(0x0705060403010200)))); +} +inline v_uint8x16 v_interleave_pairs(const v_uint8x16& vec) { return v_reinterpret_as_u8(v_interleave_pairs(v_reinterpret_as_s8(vec))); } +inline v_int8x16 v_interleave_quads(const v_int8x16& vec) +{ + return v_int8x16(vcombine_s8(vtbl1_s8(vget_low_s8(vec.val), vcreate_s8(0x0703060205010400)), vtbl1_s8(vget_high_s8(vec.val), vcreate_s8(0x0703060205010400)))); +} +inline v_uint8x16 v_interleave_quads(const v_uint8x16& vec) { return v_reinterpret_as_u8(v_interleave_quads(v_reinterpret_as_s8(vec))); } + +inline v_int16x8 v_interleave_pairs(const v_int16x8& vec) +{ + return v_int16x8(vreinterpretq_s16_s8(vcombine_s8(vtbl1_s8(vget_low_s8(vreinterpretq_s8_s16(vec.val)), vcreate_s8(0x0706030205040100)), vtbl1_s8(vget_high_s8(vreinterpretq_s8_s16(vec.val)), vcreate_s8(0x0706030205040100))))); +} +inline v_uint16x8 v_interleave_pairs(const v_uint16x8& vec) { return v_reinterpret_as_u16(v_interleave_pairs(v_reinterpret_as_s16(vec))); } +inline v_int16x8 v_interleave_quads(const v_int16x8& vec) +{ + int16x4x2_t res = vzip_s16(vget_low_s16(vec.val), vget_high_s16(vec.val)); + return v_int16x8(vcombine_s16(res.val[0], res.val[1])); +} +inline v_uint16x8 v_interleave_quads(const v_uint16x8& vec) { return v_reinterpret_as_u16(v_interleave_quads(v_reinterpret_as_s16(vec))); } + +inline v_int32x4 v_interleave_pairs(const v_int32x4& vec) +{ + int32x2x2_t res = vzip_s32(vget_low_s32(vec.val), vget_high_s32(vec.val)); + return v_int32x4(vcombine_s32(res.val[0], res.val[1])); +} +inline v_uint32x4 v_interleave_pairs(const v_uint32x4& vec) { return v_reinterpret_as_u32(v_interleave_pairs(v_reinterpret_as_s32(vec))); } +inline v_float32x4 v_interleave_pairs(const v_float32x4& vec) { return v_reinterpret_as_f32(v_interleave_pairs(v_reinterpret_as_s32(vec))); } + +inline v_int8x16 v_pack_triplets(const v_int8x16& vec) +{ + return v_int8x16(vextq_s8(vcombine_s8(vtbl1_s8(vget_low_s8(vec.val), vcreate_s8(0x0605040201000000)), vtbl1_s8(vget_high_s8(vec.val), vcreate_s8(0x0807060504020100))), vdupq_n_s8(0), 2)); +} +inline v_uint8x16 v_pack_triplets(const v_uint8x16& vec) { return v_reinterpret_as_u8(v_pack_triplets(v_reinterpret_as_s8(vec))); } + +inline v_int16x8 v_pack_triplets(const v_int16x8& vec) +{ + return v_int16x8(vreinterpretq_s16_s8(vextq_s8(vcombine_s8(vtbl1_s8(vget_low_s8(vreinterpretq_s8_s16(vec.val)), vcreate_s8(0x0504030201000000)), vget_high_s8(vreinterpretq_s8_s16(vec.val))), vdupq_n_s8(0), 2))); +} +inline v_uint16x8 v_pack_triplets(const v_uint16x8& vec) { return v_reinterpret_as_u16(v_pack_triplets(v_reinterpret_as_s16(vec))); } + +inline v_int32x4 v_pack_triplets(const v_int32x4& vec) { return vec; } +inline v_uint32x4 v_pack_triplets(const v_uint32x4& vec) { return vec; } +inline v_float32x4 v_pack_triplets(const v_float32x4& vec) { return vec; } + +#if CV_SIMD128_64F +inline v_float64x2 v_lut(const double* tab, const int* idx) +{ + double CV_DECL_ALIGNED(32) elems[2] = + { + tab[idx[0]], + tab[idx[1]] + }; + return v_float64x2(vld1q_f64(elems)); +} + +inline v_float64x2 v_lut_pairs(const double* tab, const int* idx) +{ + return v_float64x2(vld1q_f64(tab + idx[0])); +} + +inline v_float64x2 v_lut(const double* tab, const v_int32x4& idxvec) +{ + double CV_DECL_ALIGNED(32) elems[2] = + { + tab[vgetq_lane_s32(idxvec.val, 0)], + tab[vgetq_lane_s32(idxvec.val, 1)], + }; + return v_float64x2(vld1q_f64(elems)); +} + +inline void v_lut_deinterleave(const double* tab, const v_int32x4& idxvec, v_float64x2& x, v_float64x2& y) +{ + int CV_DECL_ALIGNED(32) idx[4]; + v_store_aligned(idx, idxvec); + + x = v_float64x2(tab[idx[0]], tab[idx[1]]); + y = v_float64x2(tab[idx[0]+1], tab[idx[1]+1]); +} +#endif + +////// FP16 support /////// +#if CV_FP16 +inline v_float32x4 v_load_expand(const float16_t* ptr) +{ + float16x4_t v = + #ifndef vld1_f16 // APPLE compiler defines vld1_f16 as macro + (float16x4_t)vld1_s16((const short*)ptr); + #else + vld1_f16((const __fp16*)ptr); + #endif + return v_float32x4(vcvt_f32_f16(v)); +} + +inline void v_pack_store(float16_t* ptr, const v_float32x4& v) +{ + float16x4_t hv = vcvt_f16_f32(v.val); + + #ifndef vst1_f16 // APPLE compiler defines vst1_f16 as macro + vst1_s16((short*)ptr, (int16x4_t)hv); + #else + vst1_f16((__fp16*)ptr, hv); + #endif +} +#else +inline v_float32x4 v_load_expand(const float16_t* ptr) +{ + const int N = 4; + float buf[N]; + for( int i = 0; i < N; i++ ) buf[i] = (float)ptr[i]; + return v_load(buf); +} + +inline void v_pack_store(float16_t* ptr, const v_float32x4& v) +{ + const int N = 4; + float buf[N]; + v_store(buf, v); + for( int i = 0; i < N; i++ ) ptr[i] = float16_t(buf[i]); +} +#endif + +inline void v_cleanup() {} + +CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END + //! @endcond } diff --git a/IPL/include/opencv/opencv2/core/hal/intrin_sse.hpp b/IPL/include/opencv/opencv2/core/hal/intrin_sse.hpp index 1840e03..867ff55 100644 --- a/IPL/include/opencv/opencv2/core/hal/intrin_sse.hpp +++ b/IPL/include/opencv/opencv2/core/hal/intrin_sse.hpp @@ -42,25 +42,40 @@ // //M*/ -#ifndef __OPENCV_HAL_SSE_HPP__ -#define __OPENCV_HAL_SSE_HPP__ +#ifndef OPENCV_HAL_SSE_HPP +#define OPENCV_HAL_SSE_HPP #include +#include "opencv2/core/utility.hpp" #define CV_SIMD128 1 #define CV_SIMD128_64F 1 +#define CV_SIMD128_FP16 0 // no native operations with FP16 type. namespace cv { //! @cond IGNORED +// +// Compilation troubleshooting: +// - MSVC: error C2719: 'a': formal parameter with requested alignment of 16 won't be aligned +// Replace parameter declaration to const reference: +// -v_int32x4 a +// +const v_int32x4& a +// + +CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN + +///////// Types //////////// + struct v_uint8x16 { typedef uchar lane_type; + typedef __m128i vector_type; enum { nlanes = 16 }; - v_uint8x16() {} + v_uint8x16() : val(_mm_setzero_si128()) {} explicit v_uint8x16(__m128i v) : val(v) {} v_uint8x16(uchar v0, uchar v1, uchar v2, uchar v3, uchar v4, uchar v5, uchar v6, uchar v7, uchar v8, uchar v9, uchar v10, uchar v11, uchar v12, uchar v13, uchar v14, uchar v15) @@ -81,9 +96,10 @@ struct v_uint8x16 struct v_int8x16 { typedef schar lane_type; + typedef __m128i vector_type; enum { nlanes = 16 }; - v_int8x16() {} + v_int8x16() : val(_mm_setzero_si128()) {} explicit v_int8x16(__m128i v) : val(v) {} v_int8x16(schar v0, schar v1, schar v2, schar v3, schar v4, schar v5, schar v6, schar v7, schar v8, schar v9, schar v10, schar v11, schar v12, schar v13, schar v14, schar v15) @@ -104,9 +120,10 @@ struct v_int8x16 struct v_uint16x8 { typedef ushort lane_type; + typedef __m128i vector_type; enum { nlanes = 8 }; - v_uint16x8() {} + v_uint16x8() : val(_mm_setzero_si128()) {} explicit v_uint16x8(__m128i v) : val(v) {} v_uint16x8(ushort v0, ushort v1, ushort v2, ushort v3, ushort v4, ushort v5, ushort v6, ushort v7) { @@ -124,9 +141,10 @@ struct v_uint16x8 struct v_int16x8 { typedef short lane_type; + typedef __m128i vector_type; enum { nlanes = 8 }; - v_int16x8() {} + v_int16x8() : val(_mm_setzero_si128()) {} explicit v_int16x8(__m128i v) : val(v) {} v_int16x8(short v0, short v1, short v2, short v3, short v4, short v5, short v6, short v7) { @@ -137,15 +155,17 @@ struct v_int16x8 { return (short)_mm_cvtsi128_si32(val); } + __m128i val; }; struct v_uint32x4 { typedef unsigned lane_type; + typedef __m128i vector_type; enum { nlanes = 4 }; - v_uint32x4() {} + v_uint32x4() : val(_mm_setzero_si128()) {} explicit v_uint32x4(__m128i v) : val(v) {} v_uint32x4(unsigned v0, unsigned v1, unsigned v2, unsigned v3) { @@ -155,15 +175,17 @@ struct v_uint32x4 { return (unsigned)_mm_cvtsi128_si32(val); } + __m128i val; }; struct v_int32x4 { typedef int lane_type; + typedef __m128i vector_type; enum { nlanes = 4 }; - v_int32x4() {} + v_int32x4() : val(_mm_setzero_si128()) {} explicit v_int32x4(__m128i v) : val(v) {} v_int32x4(int v0, int v1, int v2, int v3) { @@ -173,15 +195,17 @@ struct v_int32x4 { return _mm_cvtsi128_si32(val); } + __m128i val; }; struct v_float32x4 { typedef float lane_type; + typedef __m128 vector_type; enum { nlanes = 4 }; - v_float32x4() {} + v_float32x4() : val(_mm_setzero_ps()) {} explicit v_float32x4(__m128 v) : val(v) {} v_float32x4(float v0, float v1, float v2, float v3) { @@ -191,15 +215,17 @@ struct v_float32x4 { return _mm_cvtss_f32(val); } + __m128 val; }; struct v_uint64x2 { typedef uint64 lane_type; + typedef __m128i vector_type; enum { nlanes = 2 }; - v_uint64x2() {} + v_uint64x2() : val(_mm_setzero_si128()) {} explicit v_uint64x2(__m128i v) : val(v) {} v_uint64x2(uint64 v0, uint64 v1) { @@ -207,19 +233,25 @@ struct v_uint64x2 } uint64 get0() const { + #if !defined(__x86_64__) && !defined(_M_X64) int a = _mm_cvtsi128_si32(val); int b = _mm_cvtsi128_si32(_mm_srli_epi64(val, 32)); return (unsigned)a | ((uint64)(unsigned)b << 32); + #else + return (uint64)_mm_cvtsi128_si64(val); + #endif } + __m128i val; }; struct v_int64x2 { typedef int64 lane_type; + typedef __m128i vector_type; enum { nlanes = 2 }; - v_int64x2() {} + v_int64x2() : val(_mm_setzero_si128()) {} explicit v_int64x2(__m128i v) : val(v) {} v_int64x2(int64 v0, int64 v1) { @@ -227,19 +259,25 @@ struct v_int64x2 } int64 get0() const { + #if !defined(__x86_64__) && !defined(_M_X64) int a = _mm_cvtsi128_si32(val); int b = _mm_cvtsi128_si32(_mm_srli_epi64(val, 32)); return (int64)((unsigned)a | ((uint64)(unsigned)b << 32)); + #else + return _mm_cvtsi128_si64(val); + #endif } + __m128i val; }; struct v_float64x2 { typedef double lane_type; + typedef __m128d vector_type; enum { nlanes = 2 }; - v_float64x2() {} + v_float64x2() : val(_mm_setzero_pd()) {} explicit v_float64x2(__m128d v) : val(v) {} v_float64x2(double v0, double v1) { @@ -249,17 +287,39 @@ struct v_float64x2 { return _mm_cvtsd_f64(val); } + __m128d val; }; +namespace hal_sse_internal +{ + template + to_sse_type v_sse_reinterpret_as(const from_sse_type& val); + +#define OPENCV_HAL_IMPL_SSE_REINTERPRET_RAW(to_sse_type, from_sse_type, sse_cast_intrin) \ + template<> inline \ + to_sse_type v_sse_reinterpret_as(const from_sse_type& a) \ + { return sse_cast_intrin(a); } + + OPENCV_HAL_IMPL_SSE_REINTERPRET_RAW(__m128i, __m128i, OPENCV_HAL_NOP) + OPENCV_HAL_IMPL_SSE_REINTERPRET_RAW(__m128i, __m128, _mm_castps_si128) + OPENCV_HAL_IMPL_SSE_REINTERPRET_RAW(__m128i, __m128d, _mm_castpd_si128) + OPENCV_HAL_IMPL_SSE_REINTERPRET_RAW(__m128, __m128i, _mm_castsi128_ps) + OPENCV_HAL_IMPL_SSE_REINTERPRET_RAW(__m128, __m128, OPENCV_HAL_NOP) + OPENCV_HAL_IMPL_SSE_REINTERPRET_RAW(__m128, __m128d, _mm_castpd_ps) + OPENCV_HAL_IMPL_SSE_REINTERPRET_RAW(__m128d, __m128i, _mm_castsi128_pd) + OPENCV_HAL_IMPL_SSE_REINTERPRET_RAW(__m128d, __m128, _mm_castps_pd) + OPENCV_HAL_IMPL_SSE_REINTERPRET_RAW(__m128d, __m128d, OPENCV_HAL_NOP) +} + #define OPENCV_HAL_IMPL_SSE_INITVEC(_Tpvec, _Tp, suffix, zsuffix, ssuffix, _Tps, cast) \ inline _Tpvec v_setzero_##suffix() { return _Tpvec(_mm_setzero_##zsuffix()); } \ inline _Tpvec v_setall_##suffix(_Tp v) { return _Tpvec(_mm_set1_##ssuffix((_Tps)v)); } \ template inline _Tpvec v_reinterpret_as_##suffix(const _Tpvec0& a) \ { return _Tpvec(cast(a.val)); } -OPENCV_HAL_IMPL_SSE_INITVEC(v_uint8x16, uchar, u8, si128, epi8, char, OPENCV_HAL_NOP) -OPENCV_HAL_IMPL_SSE_INITVEC(v_int8x16, schar, s8, si128, epi8, char, OPENCV_HAL_NOP) +OPENCV_HAL_IMPL_SSE_INITVEC(v_uint8x16, uchar, u8, si128, epi8, schar, OPENCV_HAL_NOP) +OPENCV_HAL_IMPL_SSE_INITVEC(v_int8x16, schar, s8, si128, epi8, schar, OPENCV_HAL_NOP) OPENCV_HAL_IMPL_SSE_INITVEC(v_uint16x8, ushort, u16, si128, epi16, short, OPENCV_HAL_NOP) OPENCV_HAL_IMPL_SSE_INITVEC(v_int16x8, short, s16, si128, epi16, short, OPENCV_HAL_NOP) OPENCV_HAL_IMPL_SSE_INITVEC(v_uint32x4, unsigned, u32, si128, epi32, int, OPENCV_HAL_NOP) @@ -362,7 +422,7 @@ void v_rshr_pack_u_store(uchar* ptr, const v_int16x8& a) inline v_int8x16 v_pack(const v_int16x8& a, const v_int16x8& b) { return v_int8x16(_mm_packs_epi16(a.val, b.val)); } -inline void v_pack_store(schar* ptr, v_int16x8& a) +inline void v_pack_store(schar* ptr, const v_int16x8& a) { _mm_storel_epi64((__m128i*)ptr, _mm_packs_epi16(a.val, a.val)); } template inline @@ -383,20 +443,18 @@ void v_rshr_pack_store(schar* ptr, const v_int16x8& a) } -// bit-wise "mask ? a : b" +// byte-wise "mask ? a : b" inline __m128i v_select_si128(__m128i mask, __m128i a, __m128i b) { +#if CV_SSE4_1 + return _mm_blendv_epi8(b, a, mask); +#else return _mm_xor_si128(b, _mm_and_si128(_mm_xor_si128(a, b), mask)); +#endif } inline v_uint16x8 v_pack(const v_uint32x4& a, const v_uint32x4& b) -{ - __m128i z = _mm_setzero_si128(), maxval32 = _mm_set1_epi32(65535), delta32 = _mm_set1_epi32(32768); - __m128i a1 = _mm_sub_epi32(v_select_si128(_mm_cmpgt_epi32(z, a.val), maxval32, a.val), delta32); - __m128i b1 = _mm_sub_epi32(v_select_si128(_mm_cmpgt_epi32(z, b.val), maxval32, b.val), delta32); - __m128i r = _mm_packs_epi32(a1, b1); - return v_uint16x8(_mm_sub_epi16(r, _mm_set1_epi16(-32768))); -} +{ return v_uint16x8(_v128_packs_epu32(a.val, b.val)); } inline void v_pack_store(ushort* ptr, const v_uint32x4& a) { @@ -426,37 +484,62 @@ void v_rshr_pack_store(ushort* ptr, const v_uint32x4& a) inline v_uint16x8 v_pack_u(const v_int32x4& a, const v_int32x4& b) { +#if CV_SSE4_1 + return v_uint16x8(_mm_packus_epi32(a.val, b.val)); +#else __m128i delta32 = _mm_set1_epi32(32768); - __m128i r = _mm_packs_epi32(_mm_sub_epi32(a.val, delta32), _mm_sub_epi32(b.val, delta32)); + + // preliminary saturate negative values to zero + __m128i a1 = _mm_and_si128(a.val, _mm_cmpgt_epi32(a.val, _mm_set1_epi32(0))); + __m128i b1 = _mm_and_si128(b.val, _mm_cmpgt_epi32(b.val, _mm_set1_epi32(0))); + + __m128i r = _mm_packs_epi32(_mm_sub_epi32(a1, delta32), _mm_sub_epi32(b1, delta32)); return v_uint16x8(_mm_sub_epi16(r, _mm_set1_epi16(-32768))); +#endif } inline void v_pack_u_store(ushort* ptr, const v_int32x4& a) { +#if CV_SSE4_1 + _mm_storel_epi64((__m128i*)ptr, _mm_packus_epi32(a.val, a.val)); +#else __m128i delta32 = _mm_set1_epi32(32768); __m128i a1 = _mm_sub_epi32(a.val, delta32); __m128i r = _mm_sub_epi16(_mm_packs_epi32(a1, a1), _mm_set1_epi16(-32768)); _mm_storel_epi64((__m128i*)ptr, r); +#endif } template inline v_uint16x8 v_rshr_pack_u(const v_int32x4& a, const v_int32x4& b) { +#if CV_SSE4_1 + __m128i delta = _mm_set1_epi32(1 << (n - 1)); + return v_uint16x8(_mm_packus_epi32(_mm_srai_epi32(_mm_add_epi32(a.val, delta), n), + _mm_srai_epi32(_mm_add_epi32(b.val, delta), n))); +#else __m128i delta = _mm_set1_epi32(1 << (n-1)), delta32 = _mm_set1_epi32(32768); __m128i a1 = _mm_sub_epi32(_mm_srai_epi32(_mm_add_epi32(a.val, delta), n), delta32); __m128i a2 = _mm_sub_epi16(_mm_packs_epi32(a1, a1), _mm_set1_epi16(-32768)); __m128i b1 = _mm_sub_epi32(_mm_srai_epi32(_mm_add_epi32(b.val, delta), n), delta32); __m128i b2 = _mm_sub_epi16(_mm_packs_epi32(b1, b1), _mm_set1_epi16(-32768)); return v_uint16x8(_mm_unpacklo_epi64(a2, b2)); +#endif } template inline void v_rshr_pack_u_store(ushort* ptr, const v_int32x4& a) { +#if CV_SSE4_1 + __m128i delta = _mm_set1_epi32(1 << (n - 1)); + __m128i a1 = _mm_srai_epi32(_mm_add_epi32(a.val, delta), n); + _mm_storel_epi64((__m128i*)ptr, _mm_packus_epi32(a1, a1)); +#else __m128i delta = _mm_set1_epi32(1 << (n-1)), delta32 = _mm_set1_epi32(32768); __m128i a1 = _mm_sub_epi32(_mm_srai_epi32(_mm_add_epi32(a.val, delta), n), delta32); __m128i a2 = _mm_sub_epi16(_mm_packs_epi32(a1, a1), _mm_set1_epi16(-32768)); _mm_storel_epi64((__m128i*)ptr, a2); +#endif } inline v_int16x8 v_pack(const v_int32x4& a, const v_int32x4& b) @@ -567,6 +650,35 @@ void v_rshr_pack_store(int* ptr, const v_int64x2& a) _mm_storel_epi64((__m128i*)ptr, a2); } +// pack boolean +inline v_uint8x16 v_pack_b(const v_uint16x8& a, const v_uint16x8& b) +{ + __m128i ab = _mm_packs_epi16(a.val, b.val); + return v_uint8x16(ab); +} + +inline v_uint8x16 v_pack_b(const v_uint32x4& a, const v_uint32x4& b, + const v_uint32x4& c, const v_uint32x4& d) +{ + __m128i ab = _mm_packs_epi32(a.val, b.val); + __m128i cd = _mm_packs_epi32(c.val, d.val); + return v_uint8x16(_mm_packs_epi16(ab, cd)); +} + +inline v_uint8x16 v_pack_b(const v_uint64x2& a, const v_uint64x2& b, const v_uint64x2& c, + const v_uint64x2& d, const v_uint64x2& e, const v_uint64x2& f, + const v_uint64x2& g, const v_uint64x2& h) +{ + __m128i ab = _mm_packs_epi32(a.val, b.val); + __m128i cd = _mm_packs_epi32(c.val, d.val); + __m128i ef = _mm_packs_epi32(e.val, f.val); + __m128i gh = _mm_packs_epi32(g.val, h.val); + + __m128i abcd = _mm_packs_epi32(ab, cd); + __m128i efgh = _mm_packs_epi32(ef, gh); + return v_uint8x16(_mm_packs_epi16(abcd, efgh)); +} + inline v_float32x4 v_matmul(const v_float32x4& v, const v_float32x4& m0, const v_float32x4& m1, const v_float32x4& m2, const v_float32x4& m3) @@ -579,6 +691,16 @@ inline v_float32x4 v_matmul(const v_float32x4& v, const v_float32x4& m0, return v_float32x4(_mm_add_ps(_mm_add_ps(v0, v1), _mm_add_ps(v2, v3))); } +inline v_float32x4 v_matmuladd(const v_float32x4& v, const v_float32x4& m0, + const v_float32x4& m1, const v_float32x4& m2, + const v_float32x4& a) +{ + __m128 v0 = _mm_mul_ps(_mm_shuffle_ps(v.val, v.val, _MM_SHUFFLE(0, 0, 0, 0)), m0.val); + __m128 v1 = _mm_mul_ps(_mm_shuffle_ps(v.val, v.val, _MM_SHUFFLE(1, 1, 1, 1)), m1.val); + __m128 v2 = _mm_mul_ps(_mm_shuffle_ps(v.val, v.val, _MM_SHUFFLE(2, 2, 2, 2)), m2.val); + + return v_float32x4(_mm_add_ps(_mm_add_ps(v0, v1), _mm_add_ps(v2, a.val))); +} #define OPENCV_HAL_IMPL_SSE_BIN_OP(bin_op, _Tpvec, intrin) \ inline _Tpvec operator bin_op (const _Tpvec& a, const _Tpvec& b) \ @@ -597,14 +719,14 @@ OPENCV_HAL_IMPL_SSE_BIN_OP(+, v_int8x16, _mm_adds_epi8) OPENCV_HAL_IMPL_SSE_BIN_OP(-, v_int8x16, _mm_subs_epi8) OPENCV_HAL_IMPL_SSE_BIN_OP(+, v_uint16x8, _mm_adds_epu16) OPENCV_HAL_IMPL_SSE_BIN_OP(-, v_uint16x8, _mm_subs_epu16) -OPENCV_HAL_IMPL_SSE_BIN_OP(*, v_uint16x8, _mm_mullo_epi16) OPENCV_HAL_IMPL_SSE_BIN_OP(+, v_int16x8, _mm_adds_epi16) OPENCV_HAL_IMPL_SSE_BIN_OP(-, v_int16x8, _mm_subs_epi16) -OPENCV_HAL_IMPL_SSE_BIN_OP(*, v_int16x8, _mm_mullo_epi16) OPENCV_HAL_IMPL_SSE_BIN_OP(+, v_uint32x4, _mm_add_epi32) OPENCV_HAL_IMPL_SSE_BIN_OP(-, v_uint32x4, _mm_sub_epi32) +OPENCV_HAL_IMPL_SSE_BIN_OP(*, v_uint32x4, _v128_mullo_epi32) OPENCV_HAL_IMPL_SSE_BIN_OP(+, v_int32x4, _mm_add_epi32) OPENCV_HAL_IMPL_SSE_BIN_OP(-, v_int32x4, _mm_sub_epi32) +OPENCV_HAL_IMPL_SSE_BIN_OP(*, v_int32x4, _v128_mullo_epi32) OPENCV_HAL_IMPL_SSE_BIN_OP(+, v_float32x4, _mm_add_ps) OPENCV_HAL_IMPL_SSE_BIN_OP(-, v_float32x4, _mm_sub_ps) OPENCV_HAL_IMPL_SSE_BIN_OP(*, v_float32x4, _mm_mul_ps) @@ -618,31 +740,41 @@ OPENCV_HAL_IMPL_SSE_BIN_OP(-, v_uint64x2, _mm_sub_epi64) OPENCV_HAL_IMPL_SSE_BIN_OP(+, v_int64x2, _mm_add_epi64) OPENCV_HAL_IMPL_SSE_BIN_OP(-, v_int64x2, _mm_sub_epi64) -inline v_uint32x4 operator * (const v_uint32x4& a, const v_uint32x4& b) -{ - __m128i c0 = _mm_mul_epu32(a.val, b.val); - __m128i c1 = _mm_mul_epu32(_mm_srli_epi64(a.val, 32), _mm_srli_epi64(b.val, 32)); - __m128i d0 = _mm_unpacklo_epi32(c0, c1); - __m128i d1 = _mm_unpackhi_epi32(c0, c1); - return v_uint32x4(_mm_unpacklo_epi64(d0, d1)); -} -inline v_int32x4 operator * (const v_int32x4& a, const v_int32x4& b) -{ - __m128i c0 = _mm_mul_epu32(a.val, b.val); - __m128i c1 = _mm_mul_epu32(_mm_srli_epi64(a.val, 32), _mm_srli_epi64(b.val, 32)); - __m128i d0 = _mm_unpacklo_epi32(c0, c1); - __m128i d1 = _mm_unpackhi_epi32(c0, c1); - return v_int32x4(_mm_unpacklo_epi64(d0, d1)); -} -inline v_uint32x4& operator *= (v_uint32x4& a, const v_uint32x4& b) +// saturating multiply 8-bit, 16-bit +#define OPENCV_HAL_IMPL_SSE_MUL_SAT(_Tpvec, _Tpwvec) \ + inline _Tpvec operator * (const _Tpvec& a, const _Tpvec& b) \ + { \ + _Tpwvec c, d; \ + v_mul_expand(a, b, c, d); \ + return v_pack(c, d); \ + } \ + inline _Tpvec& operator *= (_Tpvec& a, const _Tpvec& b) \ + { a = a * b; return a; } + +OPENCV_HAL_IMPL_SSE_MUL_SAT(v_uint8x16, v_uint16x8) +OPENCV_HAL_IMPL_SSE_MUL_SAT(v_int8x16, v_int16x8) +OPENCV_HAL_IMPL_SSE_MUL_SAT(v_uint16x8, v_uint32x4) +OPENCV_HAL_IMPL_SSE_MUL_SAT(v_int16x8, v_int32x4) + +// Multiply and expand +inline void v_mul_expand(const v_uint8x16& a, const v_uint8x16& b, + v_uint16x8& c, v_uint16x8& d) { - a = a * b; - return a; + v_uint16x8 a0, a1, b0, b1; + v_expand(a, a0, a1); + v_expand(b, b0, b1); + c = v_mul_wrap(a0, b0); + d = v_mul_wrap(a1, b1); } -inline v_int32x4& operator *= (v_int32x4& a, const v_int32x4& b) + +inline void v_mul_expand(const v_int8x16& a, const v_int8x16& b, + v_int16x8& c, v_int16x8& d) { - a = a * b; - return a; + v_int16x8 a0, a1, b0, b1; + v_expand(a, a0, a1); + v_expand(b, b0, b1); + c = v_mul_wrap(a0, b0); + d = v_mul_wrap(a1, b1); } inline void v_mul_expand(const v_int16x8& a, const v_int16x8& b, @@ -672,10 +804,198 @@ inline void v_mul_expand(const v_uint32x4& a, const v_uint32x4& b, d.val = _mm_unpackhi_epi64(c0, c1); } +inline v_int16x8 v_mul_hi(const v_int16x8& a, const v_int16x8& b) { return v_int16x8(_mm_mulhi_epi16(a.val, b.val)); } +inline v_uint16x8 v_mul_hi(const v_uint16x8& a, const v_uint16x8& b) { return v_uint16x8(_mm_mulhi_epu16(a.val, b.val)); } + +//////// Dot Product //////// + +// 16 >> 32 inline v_int32x4 v_dotprod(const v_int16x8& a, const v_int16x8& b) +{ return v_int32x4(_mm_madd_epi16(a.val, b.val)); } +inline v_int32x4 v_dotprod(const v_int16x8& a, const v_int16x8& b, const v_int32x4& c) +{ return v_dotprod(a, b) + c; } + +// 32 >> 64 +inline v_int64x2 v_dotprod(const v_int32x4& a, const v_int32x4& b) +{ +#if CV_SSE4_1 + __m128i even = _mm_mul_epi32(a.val, b.val); + __m128i odd = _mm_mul_epi32(_mm_srli_epi64(a.val, 32), _mm_srli_epi64(b.val, 32)); + return v_int64x2(_mm_add_epi64(even, odd)); +#else + __m128i even_u = _mm_mul_epu32(a.val, b.val); + __m128i odd_u = _mm_mul_epu32(_mm_srli_epi64(a.val, 32), _mm_srli_epi64(b.val, 32)); + // convert unsigned to signed high multiplication (from: Agner Fog(veclib) and H S Warren: Hacker's delight, 2003, p. 132) + __m128i a_sign = _mm_srai_epi32(a.val, 31); + __m128i b_sign = _mm_srai_epi32(b.val, 31); + // |x * sign of x + __m128i axb = _mm_and_si128(a.val, b_sign); + __m128i bxa = _mm_and_si128(b.val, a_sign); + // sum of sign corrections + __m128i ssum = _mm_add_epi32(bxa, axb); + __m128i even_ssum = _mm_slli_epi64(ssum, 32); + __m128i odd_ssum = _mm_and_si128(ssum, _mm_set_epi32(-1, 0, -1, 0)); + // convert to signed and prod + return v_int64x2(_mm_add_epi64(_mm_sub_epi64(even_u, even_ssum), _mm_sub_epi64(odd_u, odd_ssum))); +#endif +} +inline v_int64x2 v_dotprod(const v_int32x4& a, const v_int32x4& b, const v_int64x2& c) +{ return v_dotprod(a, b) + c; } + +// 8 >> 32 +inline v_uint32x4 v_dotprod_expand(const v_uint8x16& a, const v_uint8x16& b) +{ + __m128i a0 = _mm_srli_epi16(_mm_slli_si128(a.val, 1), 8); // even + __m128i a1 = _mm_srli_epi16(a.val, 8); // odd + __m128i b0 = _mm_srli_epi16(_mm_slli_si128(b.val, 1), 8); + __m128i b1 = _mm_srli_epi16(b.val, 8); + __m128i p0 = _mm_madd_epi16(a0, b0); + __m128i p1 = _mm_madd_epi16(a1, b1); + return v_uint32x4(_mm_add_epi32(p0, p1)); +} +inline v_uint32x4 v_dotprod_expand(const v_uint8x16& a, const v_uint8x16& b, const v_uint32x4& c) +{ return v_dotprod_expand(a, b) + c; } + +inline v_int32x4 v_dotprod_expand(const v_int8x16& a, const v_int8x16& b) +{ + __m128i a0 = _mm_srai_epi16(_mm_slli_si128(a.val, 1), 8); // even + __m128i a1 = _mm_srai_epi16(a.val, 8); // odd + __m128i b0 = _mm_srai_epi16(_mm_slli_si128(b.val, 1), 8); + __m128i b1 = _mm_srai_epi16(b.val, 8); + __m128i p0 = _mm_madd_epi16(a0, b0); + __m128i p1 = _mm_madd_epi16(a1, b1); + return v_int32x4(_mm_add_epi32(p0, p1)); +} +inline v_int32x4 v_dotprod_expand(const v_int8x16& a, const v_int8x16& b, const v_int32x4& c) +{ return v_dotprod_expand(a, b) + c; } + +// 16 >> 64 +inline v_uint64x2 v_dotprod_expand(const v_uint16x8& a, const v_uint16x8& b) +{ + v_uint32x4 c, d; + v_mul_expand(a, b, c, d); + + v_uint64x2 c0, c1, d0, d1; + v_expand(c, c0, c1); + v_expand(d, d0, d1); + + c0 += c1; d0 += d1; + return v_uint64x2(_mm_add_epi64( + _mm_unpacklo_epi64(c0.val, d0.val), + _mm_unpackhi_epi64(c0.val, d0.val) + )); +} +inline v_uint64x2 v_dotprod_expand(const v_uint16x8& a, const v_uint16x8& b, const v_uint64x2& c) +{ return v_dotprod_expand(a, b) + c; } + +inline v_int64x2 v_dotprod_expand(const v_int16x8& a, const v_int16x8& b) +{ + v_int32x4 prod = v_dotprod(a, b); + v_int64x2 c, d; + v_expand(prod, c, d); + return v_int64x2(_mm_add_epi64( + _mm_unpacklo_epi64(c.val, d.val), + _mm_unpackhi_epi64(c.val, d.val) + )); +} +inline v_int64x2 v_dotprod_expand(const v_int16x8& a, const v_int16x8& b, const v_int64x2& c) +{ return v_dotprod_expand(a, b) + c; } + +// 32 >> 64f +inline v_float64x2 v_dotprod_expand(const v_int32x4& a, const v_int32x4& b) +{ +#if CV_SSE4_1 + return v_cvt_f64(v_dotprod(a, b)); +#else + v_float64x2 c = v_cvt_f64(a) * v_cvt_f64(b); + v_float64x2 d = v_cvt_f64_high(a) * v_cvt_f64_high(b); + + return v_float64x2(_mm_add_pd( + _mm_unpacklo_pd(c.val, d.val), + _mm_unpackhi_pd(c.val, d.val) + )); +#endif +} +inline v_float64x2 v_dotprod_expand(const v_int32x4& a, const v_int32x4& b, const v_float64x2& c) +{ return v_dotprod_expand(a, b) + c; } + +//////// Fast Dot Product //////// + +// 16 >> 32 +inline v_int32x4 v_dotprod_fast(const v_int16x8& a, const v_int16x8& b) +{ return v_dotprod(a, b); } +inline v_int32x4 v_dotprod_fast(const v_int16x8& a, const v_int16x8& b, const v_int32x4& c) +{ return v_dotprod(a, b) + c; } + +// 32 >> 64 +inline v_int64x2 v_dotprod_fast(const v_int32x4& a, const v_int32x4& b) +{ return v_dotprod(a, b); } +inline v_int64x2 v_dotprod_fast(const v_int32x4& a, const v_int32x4& b, const v_int64x2& c) +{ return v_dotprod_fast(a, b) + c; } + +// 8 >> 32 +inline v_uint32x4 v_dotprod_expand_fast(const v_uint8x16& a, const v_uint8x16& b) +{ + __m128i a0 = v_expand_low(a).val; + __m128i a1 = v_expand_high(a).val; + __m128i b0 = v_expand_low(b).val; + __m128i b1 = v_expand_high(b).val; + __m128i p0 = _mm_madd_epi16(a0, b0); + __m128i p1 = _mm_madd_epi16(a1, b1); + return v_uint32x4(_mm_add_epi32(p0, p1)); +} +inline v_uint32x4 v_dotprod_expand_fast(const v_uint8x16& a, const v_uint8x16& b, const v_uint32x4& c) +{ return v_dotprod_expand_fast(a, b) + c; } + +inline v_int32x4 v_dotprod_expand_fast(const v_int8x16& a, const v_int8x16& b) +{ +#if CV_SSE4_1 + __m128i a0 = _mm_cvtepi8_epi16(a.val); + __m128i a1 = v_expand_high(a).val; + __m128i b0 = _mm_cvtepi8_epi16(b.val); + __m128i b1 = v_expand_high(b).val; + __m128i p0 = _mm_madd_epi16(a0, b0); + __m128i p1 = _mm_madd_epi16(a1, b1); + return v_int32x4(_mm_add_epi32(p0, p1)); +#else + return v_dotprod_expand(a, b); +#endif +} +inline v_int32x4 v_dotprod_expand_fast(const v_int8x16& a, const v_int8x16& b, const v_int32x4& c) +{ return v_dotprod_expand_fast(a, b) + c; } + +// 16 >> 64 +inline v_uint64x2 v_dotprod_expand_fast(const v_uint16x8& a, const v_uint16x8& b) +{ + v_uint32x4 c, d; + v_mul_expand(a, b, c, d); + + v_uint64x2 c0, c1, d0, d1; + v_expand(c, c0, c1); + v_expand(d, d0, d1); + + c0 += c1; d0 += d1; + return c0 + d0; +} +inline v_uint64x2 v_dotprod_expand_fast(const v_uint16x8& a, const v_uint16x8& b, const v_uint64x2& c) +{ return v_dotprod_expand_fast(a, b) + c; } + +inline v_int64x2 v_dotprod_expand_fast(const v_int16x8& a, const v_int16x8& b) { - return v_int32x4(_mm_madd_epi16(a.val, b.val)); + v_int32x4 prod = v_dotprod(a, b); + v_int64x2 c, d; + v_expand(prod, c, d); + return c + d; } +inline v_int64x2 v_dotprod_expand_fast(const v_int16x8& a, const v_int16x8& b, const v_int64x2& c) +{ return v_dotprod_expand_fast(a, b) + c; } + +// 32 >> 64f +v_float64x2 v_fma(const v_float64x2& a, const v_float64x2& b, const v_float64x2& c); +inline v_float64x2 v_dotprod_expand_fast(const v_int32x4& a, const v_int32x4& b) +{ return v_fma(v_cvt_f64(a), v_cvt_f64(b), v_cvt_f64_high(a) * v_cvt_f64_high(b)); } +inline v_float64x2 v_dotprod_expand_fast(const v_int32x4& a, const v_int32x4& b, const v_float64x2& c) +{ return v_fma(v_cvt_f64(a), v_cvt_f64(b), v_fma(v_cvt_f64_high(a), v_cvt_f64_high(b), c)); } #define OPENCV_HAL_IMPL_SSE_LOGIC_OP(_Tpvec, suffix, not_const) \ OPENCV_HAL_IMPL_SSE_BIN_OP(&, _Tpvec, _mm_and_##suffix) \ @@ -702,7 +1022,7 @@ inline v_float32x4 v_sqrt(const v_float32x4& x) inline v_float32x4 v_invsqrt(const v_float32x4& x) { - static const __m128 _0_5 = _mm_set1_ps(0.5f), _1_5 = _mm_set1_ps(1.5f); + const __m128 _0_5 = _mm_set1_ps(0.5f), _1_5 = _mm_set1_ps(1.5f); __m128 t = x.val; __m128 h = _mm_mul_ps(t, _0_5); t = _mm_rsqrt_ps(t); @@ -715,10 +1035,22 @@ inline v_float64x2 v_sqrt(const v_float64x2& x) inline v_float64x2 v_invsqrt(const v_float64x2& x) { - static const __m128d v_1 = _mm_set1_pd(1.); + const __m128d v_1 = _mm_set1_pd(1.); return v_float64x2(_mm_div_pd(v_1, _mm_sqrt_pd(x.val))); } +#define OPENCV_HAL_IMPL_SSE_ABS_INT_FUNC(_Tpuvec, _Tpsvec, func, suffix, subWidth) \ +inline _Tpuvec v_abs(const _Tpsvec& x) \ +{ return _Tpuvec(_mm_##func##_ep##suffix(x.val, _mm_sub_ep##subWidth(_mm_setzero_si128(), x.val))); } + +OPENCV_HAL_IMPL_SSE_ABS_INT_FUNC(v_uint8x16, v_int8x16, min, u8, i8) +OPENCV_HAL_IMPL_SSE_ABS_INT_FUNC(v_uint16x8, v_int16x8, max, i16, i16) +inline v_uint32x4 v_abs(const v_int32x4& x) +{ + __m128i s = _mm_srli_epi32(x.val, 31); + __m128i f = _mm_srai_epi32(x.val, 31); + return v_uint32x4(_mm_add_epi32(_mm_xor_si128(x.val, f), s)); +} inline v_float32x4 v_abs(const v_float32x4& x) { return v_float32x4(_mm_and_ps(x.val, _mm_castsi128_ps(_mm_set1_epi32(0x7fffffff)))); } inline v_float64x2 v_abs(const v_float64x2& x) @@ -746,43 +1078,75 @@ OPENCV_HAL_IMPL_SSE_BIN_FUNC(v_float64x2, v_max, _mm_max_pd) inline v_int8x16 v_min(const v_int8x16& a, const v_int8x16& b) { +#if CV_SSE4_1 + return v_int8x16(_mm_min_epi8(a.val, b.val)); +#else __m128i delta = _mm_set1_epi8((char)-128); return v_int8x16(_mm_xor_si128(delta, _mm_min_epu8(_mm_xor_si128(a.val, delta), _mm_xor_si128(b.val, delta)))); +#endif } inline v_int8x16 v_max(const v_int8x16& a, const v_int8x16& b) { +#if CV_SSE4_1 + return v_int8x16(_mm_max_epi8(a.val, b.val)); +#else __m128i delta = _mm_set1_epi8((char)-128); return v_int8x16(_mm_xor_si128(delta, _mm_max_epu8(_mm_xor_si128(a.val, delta), _mm_xor_si128(b.val, delta)))); +#endif } inline v_uint16x8 v_min(const v_uint16x8& a, const v_uint16x8& b) { +#if CV_SSE4_1 + return v_uint16x8(_mm_min_epu16(a.val, b.val)); +#else return v_uint16x8(_mm_subs_epu16(a.val, _mm_subs_epu16(a.val, b.val))); +#endif } inline v_uint16x8 v_max(const v_uint16x8& a, const v_uint16x8& b) { +#if CV_SSE4_1 + return v_uint16x8(_mm_max_epu16(a.val, b.val)); +#else return v_uint16x8(_mm_adds_epu16(_mm_subs_epu16(a.val, b.val), b.val)); +#endif } inline v_uint32x4 v_min(const v_uint32x4& a, const v_uint32x4& b) { +#if CV_SSE4_1 + return v_uint32x4(_mm_min_epu32(a.val, b.val)); +#else __m128i delta = _mm_set1_epi32((int)0x80000000); __m128i mask = _mm_cmpgt_epi32(_mm_xor_si128(a.val, delta), _mm_xor_si128(b.val, delta)); return v_uint32x4(v_select_si128(mask, b.val, a.val)); +#endif } inline v_uint32x4 v_max(const v_uint32x4& a, const v_uint32x4& b) { +#if CV_SSE4_1 + return v_uint32x4(_mm_max_epu32(a.val, b.val)); +#else __m128i delta = _mm_set1_epi32((int)0x80000000); __m128i mask = _mm_cmpgt_epi32(_mm_xor_si128(a.val, delta), _mm_xor_si128(b.val, delta)); return v_uint32x4(v_select_si128(mask, a.val, b.val)); +#endif } inline v_int32x4 v_min(const v_int32x4& a, const v_int32x4& b) { +#if CV_SSE4_1 + return v_int32x4(_mm_min_epi32(a.val, b.val)); +#else return v_int32x4(v_select_si128(_mm_cmpgt_epi32(a.val, b.val), b.val, a.val)); +#endif } inline v_int32x4 v_max(const v_int32x4& a, const v_int32x4& b) { +#if CV_SSE4_1 + return v_int32x4(_mm_max_epi32(a.val, b.val)); +#else return v_int32x4(v_select_si128(_mm_cmpgt_epi32(a.val, b.val), a.val, b.val)); +#endif } #define OPENCV_HAL_IMPL_SSE_INT_CMP_OP(_Tpuvec, _Tpsvec, suffix, sbit) \ @@ -864,6 +1228,29 @@ inline _Tpvec operator >= (const _Tpvec& a, const _Tpvec& b) \ OPENCV_HAL_IMPL_SSE_FLT_CMP_OP(v_float32x4, ps) OPENCV_HAL_IMPL_SSE_FLT_CMP_OP(v_float64x2, pd) +#if CV_SSE4_1 +#define OPENCV_HAL_IMPL_SSE_64BIT_CMP_OP(_Tpvec) \ +inline _Tpvec operator == (const _Tpvec& a, const _Tpvec& b) \ +{ return _Tpvec(_mm_cmpeq_epi64(a.val, b.val)); } \ +inline _Tpvec operator != (const _Tpvec& a, const _Tpvec& b) \ +{ return ~(a == b); } +#else +#define OPENCV_HAL_IMPL_SSE_64BIT_CMP_OP(_Tpvec) \ +inline _Tpvec operator == (const _Tpvec& a, const _Tpvec& b) \ +{ __m128i cmp = _mm_cmpeq_epi32(a.val, b.val); \ + return _Tpvec(_mm_and_si128(cmp, _mm_shuffle_epi32(cmp, _MM_SHUFFLE(2, 3, 0, 1)))); } \ +inline _Tpvec operator != (const _Tpvec& a, const _Tpvec& b) \ +{ return ~(a == b); } +#endif + +OPENCV_HAL_IMPL_SSE_64BIT_CMP_OP(v_uint64x2) +OPENCV_HAL_IMPL_SSE_64BIT_CMP_OP(v_int64x2) + +inline v_float32x4 v_not_nan(const v_float32x4& a) +{ return v_float32x4(_mm_cmpord_ps(a.val, a.val)); } +inline v_float64x2 v_not_nan(const v_float64x2& a) +{ return v_float64x2(_mm_cmpord_pd(a.val, a.val)); } + OPENCV_HAL_IMPL_SSE_BIN_FUNC(v_uint8x16, v_add_wrap, _mm_add_epi8) OPENCV_HAL_IMPL_SSE_BIN_FUNC(v_int8x16, v_add_wrap, _mm_add_epi8) OPENCV_HAL_IMPL_SSE_BIN_FUNC(v_uint16x8, v_add_wrap, _mm_add_epi16) @@ -872,33 +1259,86 @@ OPENCV_HAL_IMPL_SSE_BIN_FUNC(v_uint8x16, v_sub_wrap, _mm_sub_epi8) OPENCV_HAL_IMPL_SSE_BIN_FUNC(v_int8x16, v_sub_wrap, _mm_sub_epi8) OPENCV_HAL_IMPL_SSE_BIN_FUNC(v_uint16x8, v_sub_wrap, _mm_sub_epi16) OPENCV_HAL_IMPL_SSE_BIN_FUNC(v_int16x8, v_sub_wrap, _mm_sub_epi16) +OPENCV_HAL_IMPL_SSE_BIN_FUNC(v_uint16x8, v_mul_wrap, _mm_mullo_epi16) +OPENCV_HAL_IMPL_SSE_BIN_FUNC(v_int16x8, v_mul_wrap, _mm_mullo_epi16) -#define OPENCV_HAL_IMPL_SSE_ABSDIFF_8_16(_Tpuvec, _Tpsvec, bits, smask32) \ -inline _Tpuvec v_absdiff(const _Tpuvec& a, const _Tpuvec& b) \ -{ \ - return _Tpuvec(_mm_add_epi##bits(_mm_subs_epu##bits(a.val, b.val), _mm_subs_epu##bits(b.val, a.val))); \ -} \ -inline _Tpuvec v_absdiff(const _Tpsvec& a, const _Tpsvec& b) \ -{ \ - __m128i smask = _mm_set1_epi32(smask32); \ - __m128i a1 = _mm_xor_si128(a.val, smask); \ - __m128i b1 = _mm_xor_si128(b.val, smask); \ - return _Tpuvec(_mm_add_epi##bits(_mm_subs_epu##bits(a1, b1), _mm_subs_epu##bits(b1, a1))); \ +inline v_uint8x16 v_mul_wrap(const v_uint8x16& a, const v_uint8x16& b) +{ + __m128i ad = _mm_srai_epi16(a.val, 8); + __m128i bd = _mm_srai_epi16(b.val, 8); + __m128i p0 = _mm_mullo_epi16(a.val, b.val); // even + __m128i p1 = _mm_slli_epi16(_mm_mullo_epi16(ad, bd), 8); // odd + const __m128i b01 = _mm_set1_epi32(0xFF00FF00); + return v_uint8x16(_v128_blendv_epi8(p0, p1, b01)); +} +inline v_int8x16 v_mul_wrap(const v_int8x16& a, const v_int8x16& b) +{ + return v_reinterpret_as_s8(v_mul_wrap(v_reinterpret_as_u8(a), v_reinterpret_as_u8(b))); } -OPENCV_HAL_IMPL_SSE_ABSDIFF_8_16(v_uint8x16, v_int8x16, 8, (int)0x80808080) -OPENCV_HAL_IMPL_SSE_ABSDIFF_8_16(v_uint16x8, v_int16x8, 16, (int)0x80008000) +/** Absolute difference **/ +inline v_uint8x16 v_absdiff(const v_uint8x16& a, const v_uint8x16& b) +{ return v_add_wrap(a - b, b - a); } +inline v_uint16x8 v_absdiff(const v_uint16x8& a, const v_uint16x8& b) +{ return v_add_wrap(a - b, b - a); } inline v_uint32x4 v_absdiff(const v_uint32x4& a, const v_uint32x4& b) +{ return v_max(a, b) - v_min(a, b); } + +inline v_uint8x16 v_absdiff(const v_int8x16& a, const v_int8x16& b) { - return v_max(a, b) - v_min(a, b); + v_int8x16 d = v_sub_wrap(a, b); + v_int8x16 m = a < b; + return v_reinterpret_as_u8(v_sub_wrap(d ^ m, m)); +} +inline v_uint16x8 v_absdiff(const v_int16x8& a, const v_int16x8& b) +{ + return v_reinterpret_as_u16(v_sub_wrap(v_max(a, b), v_min(a, b))); } - inline v_uint32x4 v_absdiff(const v_int32x4& a, const v_int32x4& b) { - __m128i d = _mm_sub_epi32(a.val, b.val); - __m128i m = _mm_cmpgt_epi32(b.val, a.val); - return v_uint32x4(_mm_sub_epi32(_mm_xor_si128(d, m), m)); + v_int32x4 d = a - b; + v_int32x4 m = a < b; + return v_reinterpret_as_u32((d ^ m) - m); +} + +/** Saturating absolute difference **/ +inline v_int8x16 v_absdiffs(const v_int8x16& a, const v_int8x16& b) +{ + v_int8x16 d = a - b; + v_int8x16 m = a < b; + return (d ^ m) - m; + } +inline v_int16x8 v_absdiffs(const v_int16x8& a, const v_int16x8& b) +{ return v_max(a, b) - v_min(a, b); } + + +inline v_int32x4 v_fma(const v_int32x4& a, const v_int32x4& b, const v_int32x4& c) +{ + return a * b + c; +} + +inline v_int32x4 v_muladd(const v_int32x4& a, const v_int32x4& b, const v_int32x4& c) +{ + return v_fma(a, b, c); +} + +inline v_float32x4 v_fma(const v_float32x4& a, const v_float32x4& b, const v_float32x4& c) +{ +#if CV_FMA3 + return v_float32x4(_mm_fmadd_ps(a.val, b.val, c.val)); +#else + return v_float32x4(_mm_add_ps(_mm_mul_ps(a.val, b.val), c.val)); +#endif +} + +inline v_float64x2 v_fma(const v_float64x2& a, const v_float64x2& b, const v_float64x2& c) +{ +#if CV_FMA3 + return v_float64x2(_mm_fmadd_pd(a.val, b.val, c.val)); +#else + return v_float64x2(_mm_add_pd(_mm_mul_pd(a.val, b.val), c.val)); +#endif } #define OPENCV_HAL_IMPL_SSE_MISC_FLT_OP(_Tpvec, _Tp, _Tpreg, suffix, absmask_vec) \ @@ -909,17 +1349,16 @@ inline _Tpvec v_absdiff(const _Tpvec& a, const _Tpvec& b) \ } \ inline _Tpvec v_magnitude(const _Tpvec& a, const _Tpvec& b) \ { \ - _Tpreg res = _mm_add_##suffix(_mm_mul_##suffix(a.val, a.val), _mm_mul_##suffix(b.val, b.val)); \ - return _Tpvec(_mm_sqrt_##suffix(res)); \ + _Tpvec res = v_fma(a, a, b*b); \ + return _Tpvec(_mm_sqrt_##suffix(res.val)); \ } \ inline _Tpvec v_sqr_magnitude(const _Tpvec& a, const _Tpvec& b) \ { \ - _Tpreg res = _mm_add_##suffix(_mm_mul_##suffix(a.val, a.val), _mm_mul_##suffix(b.val, b.val)); \ - return _Tpvec(res); \ + return v_fma(a, a, b*b); \ } \ inline _Tpvec v_muladd(const _Tpvec& a, const _Tpvec& b, const _Tpvec& c) \ { \ - return _Tpvec(_mm_add_##suffix(_mm_mul_##suffix(a.val, b.val), c.val)); \ + return v_fma(a, b, c); \ } OPENCV_HAL_IMPL_SSE_MISC_FLT_OP(v_float32x4, float, __m128, ps, _mm_set1_epi32((int)0x7fffffff)) @@ -967,11 +1406,125 @@ OPENCV_HAL_IMPL_SSE_SHIFT_OP(v_uint16x8, v_int16x8, epi16, _mm_srai_epi16) OPENCV_HAL_IMPL_SSE_SHIFT_OP(v_uint32x4, v_int32x4, epi32, _mm_srai_epi32) OPENCV_HAL_IMPL_SSE_SHIFT_OP(v_uint64x2, v_int64x2, epi64, v_srai_epi64) +namespace hal_sse_internal +{ + template 16)), + bool is_first = (imm == 0), + bool is_half = (imm == 8), + bool is_second = (imm == 16), + bool is_other = (((imm > 0) && (imm < 8)) || ((imm > 8) && (imm < 16)))> + class v_sse_palignr_u8_class; + + template + class v_sse_palignr_u8_class; + + template + class v_sse_palignr_u8_class + { + public: + inline __m128i operator()(const __m128i& a, const __m128i&) const + { + return a; + } + }; + + template + class v_sse_palignr_u8_class + { + public: + inline __m128i operator()(const __m128i& a, const __m128i& b) const + { + return _mm_unpacklo_epi64(_mm_unpackhi_epi64(a, a), b); + } + }; + + template + class v_sse_palignr_u8_class + { + public: + inline __m128i operator()(const __m128i&, const __m128i& b) const + { + return b; + } + }; + + template + class v_sse_palignr_u8_class + { +#if CV_SSSE3 + public: + inline __m128i operator()(const __m128i& a, const __m128i& b) const + { + return _mm_alignr_epi8(b, a, imm); + } +#else + public: + inline __m128i operator()(const __m128i& a, const __m128i& b) const + { + enum { imm2 = (sizeof(__m128i) - imm) }; + return _mm_or_si128(_mm_srli_si128(a, imm), _mm_slli_si128(b, imm2)); + } +#endif + }; + + template + inline __m128i v_sse_palignr_u8(const __m128i& a, const __m128i& b) + { + CV_StaticAssert((imm >= 0) && (imm <= 16), "Invalid imm for v_sse_palignr_u8."); + return v_sse_palignr_u8_class()(a, b); + } +} + +template +inline _Tpvec v_rotate_right(const _Tpvec &a) +{ + using namespace hal_sse_internal; + enum { imm2 = (imm * sizeof(typename _Tpvec::lane_type)) }; + return _Tpvec(v_sse_reinterpret_as( + _mm_srli_si128( + v_sse_reinterpret_as<__m128i>(a.val), imm2))); +} + +template +inline _Tpvec v_rotate_left(const _Tpvec &a) +{ + using namespace hal_sse_internal; + enum { imm2 = (imm * sizeof(typename _Tpvec::lane_type)) }; + return _Tpvec(v_sse_reinterpret_as( + _mm_slli_si128( + v_sse_reinterpret_as<__m128i>(a.val), imm2))); +} + +template +inline _Tpvec v_rotate_right(const _Tpvec &a, const _Tpvec &b) +{ + using namespace hal_sse_internal; + enum { imm2 = (imm * sizeof(typename _Tpvec::lane_type)) }; + return _Tpvec(v_sse_reinterpret_as( + v_sse_palignr_u8( + v_sse_reinterpret_as<__m128i>(a.val), + v_sse_reinterpret_as<__m128i>(b.val)))); +} + +template +inline _Tpvec v_rotate_left(const _Tpvec &a, const _Tpvec &b) +{ + using namespace hal_sse_internal; + enum { imm2 = ((_Tpvec::nlanes - imm) * sizeof(typename _Tpvec::lane_type)) }; + return _Tpvec(v_sse_reinterpret_as( + v_sse_palignr_u8( + v_sse_reinterpret_as<__m128i>(b.val), + v_sse_reinterpret_as<__m128i>(a.val)))); +} + #define OPENCV_HAL_IMPL_SSE_LOADSTORE_INT_OP(_Tpvec, _Tp) \ inline _Tpvec v_load(const _Tp* ptr) \ { return _Tpvec(_mm_loadu_si128((const __m128i*)ptr)); } \ inline _Tpvec v_load_aligned(const _Tp* ptr) \ { return _Tpvec(_mm_load_si128((const __m128i*)ptr)); } \ +inline _Tpvec v_load_low(const _Tp* ptr) \ +{ return _Tpvec(_mm_loadl_epi64((const __m128i*)ptr)); } \ inline _Tpvec v_load_halves(const _Tp* ptr0, const _Tp* ptr1) \ { \ return _Tpvec(_mm_unpacklo_epi64(_mm_loadl_epi64((const __m128i*)ptr0), \ @@ -981,6 +1534,17 @@ inline void v_store(_Tp* ptr, const _Tpvec& a) \ { _mm_storeu_si128((__m128i*)ptr, a.val); } \ inline void v_store_aligned(_Tp* ptr, const _Tpvec& a) \ { _mm_store_si128((__m128i*)ptr, a.val); } \ +inline void v_store_aligned_nocache(_Tp* ptr, const _Tpvec& a) \ +{ _mm_stream_si128((__m128i*)ptr, a.val); } \ +inline void v_store(_Tp* ptr, const _Tpvec& a, hal::StoreMode mode) \ +{ \ + if( mode == hal::STORE_UNALIGNED ) \ + _mm_storeu_si128((__m128i*)ptr, a.val); \ + else if( mode == hal::STORE_ALIGNED_NOCACHE ) \ + _mm_stream_si128((__m128i*)ptr, a.val); \ + else \ + _mm_store_si128((__m128i*)ptr, a.val); \ +} \ inline void v_store_low(_Tp* ptr, const _Tpvec& a) \ { _mm_storel_epi64((__m128i*)ptr, a.val); } \ inline void v_store_high(_Tp* ptr, const _Tpvec& a) \ @@ -1000,6 +1564,8 @@ inline _Tpvec v_load(const _Tp* ptr) \ { return _Tpvec(_mm_loadu_##suffix(ptr)); } \ inline _Tpvec v_load_aligned(const _Tp* ptr) \ { return _Tpvec(_mm_load_##suffix(ptr)); } \ +inline _Tpvec v_load_low(const _Tp* ptr) \ +{ return _Tpvec(_mm_castsi128_##suffix(_mm_loadl_epi64((const __m128i*)ptr))); } \ inline _Tpvec v_load_halves(const _Tp* ptr0, const _Tp* ptr1) \ { \ return _Tpvec(_mm_castsi128_##suffix( \ @@ -1010,6 +1576,17 @@ inline void v_store(_Tp* ptr, const _Tpvec& a) \ { _mm_storeu_##suffix(ptr, a.val); } \ inline void v_store_aligned(_Tp* ptr, const _Tpvec& a) \ { _mm_store_##suffix(ptr, a.val); } \ +inline void v_store_aligned_nocache(_Tp* ptr, const _Tpvec& a) \ +{ _mm_stream_##suffix(ptr, a.val); } \ +inline void v_store(_Tp* ptr, const _Tpvec& a, hal::StoreMode mode) \ +{ \ + if( mode == hal::STORE_UNALIGNED ) \ + _mm_storeu_##suffix(ptr, a.val); \ + else if( mode == hal::STORE_ALIGNED_NOCACHE ) \ + _mm_stream_##suffix(ptr, a.val); \ + else \ + _mm_store_##suffix(ptr, a.val); \ +} \ inline void v_store_low(_Tp* ptr, const _Tpvec& a) \ { _mm_storel_epi64((__m128i*)ptr, _mm_cast##suffix##_si128(a.val)); } \ inline void v_store_high(_Tp* ptr, const _Tpvec& a) \ @@ -1021,6 +1598,72 @@ inline void v_store_high(_Tp* ptr, const _Tpvec& a) \ OPENCV_HAL_IMPL_SSE_LOADSTORE_FLT_OP(v_float32x4, float, ps) OPENCV_HAL_IMPL_SSE_LOADSTORE_FLT_OP(v_float64x2, double, pd) +inline unsigned v_reduce_sum(const v_uint8x16& a) +{ + __m128i half = _mm_sad_epu8(a.val, _mm_setzero_si128()); + return (unsigned)_mm_cvtsi128_si32(_mm_add_epi32(half, _mm_unpackhi_epi64(half, half))); +} +inline int v_reduce_sum(const v_int8x16& a) +{ + __m128i half = _mm_set1_epi8((schar)-128); + half = _mm_sad_epu8(_mm_xor_si128(a.val, half), _mm_setzero_si128()); + return _mm_cvtsi128_si32(_mm_add_epi32(half, _mm_unpackhi_epi64(half, half))) - 2048; +} +#define OPENCV_HAL_IMPL_SSE_REDUCE_OP_16(func) \ +inline schar v_reduce_##func(const v_int8x16& a) \ +{ \ + __m128i val = a.val; \ + __m128i smask = _mm_set1_epi8((schar)-128); \ + val = _mm_xor_si128(val, smask); \ + val = _mm_##func##_epu8(val, _mm_srli_si128(val,8)); \ + val = _mm_##func##_epu8(val, _mm_srli_si128(val,4)); \ + val = _mm_##func##_epu8(val, _mm_srli_si128(val,2)); \ + val = _mm_##func##_epu8(val, _mm_srli_si128(val,1)); \ + return (schar)_mm_cvtsi128_si32(val) ^ (schar)-128; \ +} \ +inline uchar v_reduce_##func(const v_uint8x16& a) \ +{ \ + __m128i val = a.val; \ + val = _mm_##func##_epu8(val, _mm_srli_si128(val,8)); \ + val = _mm_##func##_epu8(val, _mm_srli_si128(val,4)); \ + val = _mm_##func##_epu8(val, _mm_srli_si128(val,2)); \ + val = _mm_##func##_epu8(val, _mm_srli_si128(val,1)); \ + return (uchar)_mm_cvtsi128_si32(val); \ +} +OPENCV_HAL_IMPL_SSE_REDUCE_OP_16(max) +OPENCV_HAL_IMPL_SSE_REDUCE_OP_16(min) + +#define OPENCV_HAL_IMPL_SSE_REDUCE_OP_8(_Tpvec, scalartype, func, suffix, sbit) \ +inline scalartype v_reduce_##func(const v_##_Tpvec& a) \ +{ \ + __m128i val = a.val; \ + val = _mm_##func##_##suffix(val, _mm_srli_si128(val,8)); \ + val = _mm_##func##_##suffix(val, _mm_srli_si128(val,4)); \ + val = _mm_##func##_##suffix(val, _mm_srli_si128(val,2)); \ + return (scalartype)_mm_cvtsi128_si32(val); \ +} \ +inline unsigned scalartype v_reduce_##func(const v_u##_Tpvec& a) \ +{ \ + __m128i val = a.val; \ + __m128i smask = _mm_set1_epi16(sbit); \ + val = _mm_xor_si128(val, smask); \ + val = _mm_##func##_##suffix(val, _mm_srli_si128(val,8)); \ + val = _mm_##func##_##suffix(val, _mm_srli_si128(val,4)); \ + val = _mm_##func##_##suffix(val, _mm_srli_si128(val,2)); \ + return (unsigned scalartype)(_mm_cvtsi128_si32(val) ^ sbit); \ +} +OPENCV_HAL_IMPL_SSE_REDUCE_OP_8(int16x8, short, max, epi16, (short)-32768) +OPENCV_HAL_IMPL_SSE_REDUCE_OP_8(int16x8, short, min, epi16, (short)-32768) + +#define OPENCV_HAL_IMPL_SSE_REDUCE_OP_4_SUM(_Tpvec, scalartype, regtype, suffix, cast_from, cast_to, extract) \ +inline scalartype v_reduce_sum(const _Tpvec& a) \ +{ \ + regtype val = a.val; \ + val = _mm_add_##suffix(val, cast_to(_mm_srli_si128(cast_from(val), 8))); \ + val = _mm_add_##suffix(val, cast_to(_mm_srli_si128(cast_from(val), 4))); \ + return (scalartype)_mm_cvt##extract(val); \ +} + #define OPENCV_HAL_IMPL_SSE_REDUCE_OP_4(_Tpvec, scalartype, func, scalar_func) \ inline scalartype v_reduce_##func(const _Tpvec& a) \ { \ @@ -1031,41 +1674,178 @@ inline scalartype v_reduce_##func(const _Tpvec& a) \ return scalar_func(s0, s1); \ } -OPENCV_HAL_IMPL_SSE_REDUCE_OP_4(v_uint32x4, unsigned, sum, OPENCV_HAL_ADD) +OPENCV_HAL_IMPL_SSE_REDUCE_OP_4_SUM(v_uint32x4, unsigned, __m128i, epi32, OPENCV_HAL_NOP, OPENCV_HAL_NOP, si128_si32) +OPENCV_HAL_IMPL_SSE_REDUCE_OP_4_SUM(v_int32x4, int, __m128i, epi32, OPENCV_HAL_NOP, OPENCV_HAL_NOP, si128_si32) +OPENCV_HAL_IMPL_SSE_REDUCE_OP_4_SUM(v_float32x4, float, __m128, ps, _mm_castps_si128, _mm_castsi128_ps, ss_f32) + +inline int v_reduce_sum(const v_int16x8& a) +{ return v_reduce_sum(v_expand_low(a) + v_expand_high(a)); } +inline unsigned v_reduce_sum(const v_uint16x8& a) +{ return v_reduce_sum(v_expand_low(a) + v_expand_high(a)); } + +inline uint64 v_reduce_sum(const v_uint64x2& a) +{ + uint64 CV_DECL_ALIGNED(32) idx[2]; + v_store_aligned(idx, a); + return idx[0] + idx[1]; +} +inline int64 v_reduce_sum(const v_int64x2& a) +{ + int64 CV_DECL_ALIGNED(32) idx[2]; + v_store_aligned(idx, a); + return idx[0] + idx[1]; +} +inline double v_reduce_sum(const v_float64x2& a) +{ + double CV_DECL_ALIGNED(32) idx[2]; + v_store_aligned(idx, a); + return idx[0] + idx[1]; +} + +inline v_float32x4 v_reduce_sum4(const v_float32x4& a, const v_float32x4& b, + const v_float32x4& c, const v_float32x4& d) +{ +#if CV_SSE3 + __m128 ab = _mm_hadd_ps(a.val, b.val); + __m128 cd = _mm_hadd_ps(c.val, d.val); + return v_float32x4(_mm_hadd_ps(ab, cd)); +#else + __m128 ac = _mm_add_ps(_mm_unpacklo_ps(a.val, c.val), _mm_unpackhi_ps(a.val, c.val)); + __m128 bd = _mm_add_ps(_mm_unpacklo_ps(b.val, d.val), _mm_unpackhi_ps(b.val, d.val)); + return v_float32x4(_mm_add_ps(_mm_unpacklo_ps(ac, bd), _mm_unpackhi_ps(ac, bd))); +#endif +} + OPENCV_HAL_IMPL_SSE_REDUCE_OP_4(v_uint32x4, unsigned, max, std::max) OPENCV_HAL_IMPL_SSE_REDUCE_OP_4(v_uint32x4, unsigned, min, std::min) -OPENCV_HAL_IMPL_SSE_REDUCE_OP_4(v_int32x4, int, sum, OPENCV_HAL_ADD) OPENCV_HAL_IMPL_SSE_REDUCE_OP_4(v_int32x4, int, max, std::max) OPENCV_HAL_IMPL_SSE_REDUCE_OP_4(v_int32x4, int, min, std::min) -OPENCV_HAL_IMPL_SSE_REDUCE_OP_4(v_float32x4, float, sum, OPENCV_HAL_ADD) OPENCV_HAL_IMPL_SSE_REDUCE_OP_4(v_float32x4, float, max, std::max) OPENCV_HAL_IMPL_SSE_REDUCE_OP_4(v_float32x4, float, min, std::min) -#define OPENCV_HAL_IMPL_SSE_CHECK_SIGNS(_Tpvec, suffix, pack_op, and_op, signmask, allmask) \ -inline int v_signmask(const _Tpvec& a) \ -{ \ - return and_op(_mm_movemask_##suffix(pack_op(a.val)), signmask); \ -} \ -inline bool v_check_all(const _Tpvec& a) \ -{ return and_op(_mm_movemask_##suffix(a.val), allmask) == allmask; } \ -inline bool v_check_any(const _Tpvec& a) \ -{ return and_op(_mm_movemask_##suffix(a.val), allmask) != 0; } +inline unsigned v_reduce_sad(const v_uint8x16& a, const v_uint8x16& b) +{ + __m128i half = _mm_sad_epu8(a.val, b.val); + return (unsigned)_mm_cvtsi128_si32(_mm_add_epi32(half, _mm_unpackhi_epi64(half, half))); +} +inline unsigned v_reduce_sad(const v_int8x16& a, const v_int8x16& b) +{ + __m128i half = _mm_set1_epi8(0x7f); + half = _mm_sad_epu8(_mm_add_epi8(a.val, half), _mm_add_epi8(b.val, half)); + return (unsigned)_mm_cvtsi128_si32(_mm_add_epi32(half, _mm_unpackhi_epi64(half, half))); +} +inline unsigned v_reduce_sad(const v_uint16x8& a, const v_uint16x8& b) +{ + v_uint32x4 l, h; + v_expand(v_absdiff(a, b), l, h); + return v_reduce_sum(l + h); +} +inline unsigned v_reduce_sad(const v_int16x8& a, const v_int16x8& b) +{ + v_uint32x4 l, h; + v_expand(v_absdiff(a, b), l, h); + return v_reduce_sum(l + h); +} +inline unsigned v_reduce_sad(const v_uint32x4& a, const v_uint32x4& b) +{ + return v_reduce_sum(v_absdiff(a, b)); +} +inline unsigned v_reduce_sad(const v_int32x4& a, const v_int32x4& b) +{ + return v_reduce_sum(v_absdiff(a, b)); +} +inline float v_reduce_sad(const v_float32x4& a, const v_float32x4& b) +{ + return v_reduce_sum(v_absdiff(a, b)); +} -#define OPENCV_HAL_PACKS(a) _mm_packs_epi16(a, a) -inline __m128i v_packq_epi32(__m128i a) +inline v_uint8x16 v_popcount(const v_uint8x16& a) +{ + __m128i m1 = _mm_set1_epi32(0x55555555); + __m128i m2 = _mm_set1_epi32(0x33333333); + __m128i m4 = _mm_set1_epi32(0x0f0f0f0f); + __m128i p = a.val; + p = _mm_add_epi32(_mm_and_si128(_mm_srli_epi32(p, 1), m1), _mm_and_si128(p, m1)); + p = _mm_add_epi32(_mm_and_si128(_mm_srli_epi32(p, 2), m2), _mm_and_si128(p, m2)); + p = _mm_add_epi32(_mm_and_si128(_mm_srli_epi32(p, 4), m4), _mm_and_si128(p, m4)); + return v_uint8x16(p); +} +inline v_uint16x8 v_popcount(const v_uint16x8& a) +{ + v_uint8x16 p = v_popcount(v_reinterpret_as_u8(a)); + p += v_rotate_right<1>(p); + return v_reinterpret_as_u16(p) & v_setall_u16(0x00ff); +} +inline v_uint32x4 v_popcount(const v_uint32x4& a) +{ + v_uint8x16 p = v_popcount(v_reinterpret_as_u8(a)); + p += v_rotate_right<1>(p); + p += v_rotate_right<2>(p); + return v_reinterpret_as_u32(p) & v_setall_u32(0x000000ff); +} +inline v_uint64x2 v_popcount(const v_uint64x2& a) { - __m128i b = _mm_packs_epi32(a, a); - return _mm_packs_epi16(b, b); + return v_uint64x2(_mm_sad_epu8(v_popcount(v_reinterpret_as_u8(a)).val, _mm_setzero_si128())); +} +inline v_uint8x16 v_popcount(const v_int8x16& a) +{ return v_popcount(v_reinterpret_as_u8(a)); } +inline v_uint16x8 v_popcount(const v_int16x8& a) +{ return v_popcount(v_reinterpret_as_u16(a)); } +inline v_uint32x4 v_popcount(const v_int32x4& a) +{ return v_popcount(v_reinterpret_as_u32(a)); } +inline v_uint64x2 v_popcount(const v_int64x2& a) +{ return v_popcount(v_reinterpret_as_u64(a)); } + +#define OPENCV_HAL_IMPL_SSE_CHECK_SIGNS(_Tpvec, suffix, cast_op, allmask) \ +inline int v_signmask(const _Tpvec& a) { return _mm_movemask_##suffix(cast_op(a.val)); } \ +inline bool v_check_all(const _Tpvec& a) { return _mm_movemask_##suffix(cast_op(a.val)) == allmask; } \ +inline bool v_check_any(const _Tpvec& a) { return _mm_movemask_##suffix(cast_op(a.val)) != 0; } +OPENCV_HAL_IMPL_SSE_CHECK_SIGNS(v_uint8x16, epi8, OPENCV_HAL_NOP, 65535) +OPENCV_HAL_IMPL_SSE_CHECK_SIGNS(v_int8x16, epi8, OPENCV_HAL_NOP, 65535) +OPENCV_HAL_IMPL_SSE_CHECK_SIGNS(v_uint32x4, ps, _mm_castsi128_ps, 15) +OPENCV_HAL_IMPL_SSE_CHECK_SIGNS(v_int32x4, ps, _mm_castsi128_ps, 15) +OPENCV_HAL_IMPL_SSE_CHECK_SIGNS(v_uint64x2, pd, _mm_castsi128_pd, 3) +OPENCV_HAL_IMPL_SSE_CHECK_SIGNS(v_int64x2, pd, _mm_castsi128_pd, 3) +OPENCV_HAL_IMPL_SSE_CHECK_SIGNS(v_float32x4, ps, OPENCV_HAL_NOP, 15) +OPENCV_HAL_IMPL_SSE_CHECK_SIGNS(v_float64x2, pd, OPENCV_HAL_NOP, 3) + +#define OPENCV_HAL_IMPL_SSE_CHECK_SIGNS_SHORT(_Tpvec) \ +inline int v_signmask(const _Tpvec& a) { return _mm_movemask_epi8(_mm_packs_epi16(a.val, a.val)) & 255; } \ +inline bool v_check_all(const _Tpvec& a) { return (_mm_movemask_epi8(a.val) & 0xaaaa) == 0xaaaa; } \ +inline bool v_check_any(const _Tpvec& a) { return (_mm_movemask_epi8(a.val) & 0xaaaa) != 0; } +OPENCV_HAL_IMPL_SSE_CHECK_SIGNS_SHORT(v_uint16x8) +OPENCV_HAL_IMPL_SSE_CHECK_SIGNS_SHORT(v_int16x8) + +inline int v_scan_forward(const v_int8x16& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s8(a))); } +inline int v_scan_forward(const v_uint8x16& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s8(a))); } +inline int v_scan_forward(const v_int16x8& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s8(a))) / 2; } +inline int v_scan_forward(const v_uint16x8& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s8(a))) / 2; } +inline int v_scan_forward(const v_int32x4& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s8(a))) / 4; } +inline int v_scan_forward(const v_uint32x4& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s8(a))) / 4; } +inline int v_scan_forward(const v_float32x4& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s8(a))) / 4; } +inline int v_scan_forward(const v_int64x2& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s8(a))) / 8; } +inline int v_scan_forward(const v_uint64x2& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s8(a))) / 8; } +inline int v_scan_forward(const v_float64x2& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s8(a))) / 8; } + +#if CV_SSE4_1 +#define OPENCV_HAL_IMPL_SSE_SELECT(_Tpvec, cast_ret, cast, suffix) \ +inline _Tpvec v_select(const _Tpvec& mask, const _Tpvec& a, const _Tpvec& b) \ +{ \ + return _Tpvec(cast_ret(_mm_blendv_##suffix(cast(b.val), cast(a.val), cast(mask.val)))); \ } -OPENCV_HAL_IMPL_SSE_CHECK_SIGNS(v_uint8x16, epi8, OPENCV_HAL_NOP, OPENCV_HAL_1ST, 65535, 65535) -OPENCV_HAL_IMPL_SSE_CHECK_SIGNS(v_int8x16, epi8, OPENCV_HAL_NOP, OPENCV_HAL_1ST, 65535, 65535) -OPENCV_HAL_IMPL_SSE_CHECK_SIGNS(v_uint16x8, epi8, OPENCV_HAL_PACKS, OPENCV_HAL_AND, 255, (int)0xaaaa) -OPENCV_HAL_IMPL_SSE_CHECK_SIGNS(v_int16x8, epi8, OPENCV_HAL_PACKS, OPENCV_HAL_AND, 255, (int)0xaaaa) -OPENCV_HAL_IMPL_SSE_CHECK_SIGNS(v_uint32x4, epi8, v_packq_epi32, OPENCV_HAL_AND, 15, (int)0x8888) -OPENCV_HAL_IMPL_SSE_CHECK_SIGNS(v_int32x4, epi8, v_packq_epi32, OPENCV_HAL_AND, 15, (int)0x8888) -OPENCV_HAL_IMPL_SSE_CHECK_SIGNS(v_float32x4, ps, OPENCV_HAL_NOP, OPENCV_HAL_1ST, 15, 15) -OPENCV_HAL_IMPL_SSE_CHECK_SIGNS(v_float64x2, pd, OPENCV_HAL_NOP, OPENCV_HAL_1ST, 3, 3) +OPENCV_HAL_IMPL_SSE_SELECT(v_uint8x16, OPENCV_HAL_NOP, OPENCV_HAL_NOP, epi8) +OPENCV_HAL_IMPL_SSE_SELECT(v_int8x16, OPENCV_HAL_NOP, OPENCV_HAL_NOP, epi8) +OPENCV_HAL_IMPL_SSE_SELECT(v_uint16x8, OPENCV_HAL_NOP, OPENCV_HAL_NOP, epi8) +OPENCV_HAL_IMPL_SSE_SELECT(v_int16x8, OPENCV_HAL_NOP, OPENCV_HAL_NOP, epi8) +OPENCV_HAL_IMPL_SSE_SELECT(v_uint32x4, _mm_castps_si128, _mm_castsi128_ps, ps) +OPENCV_HAL_IMPL_SSE_SELECT(v_int32x4, _mm_castps_si128, _mm_castsi128_ps, ps) +// OPENCV_HAL_IMPL_SSE_SELECT(v_uint64x2, TBD, TBD, pd) +// OPENCV_HAL_IMPL_SSE_SELECT(v_int64x2, TBD, TBD, ps) +OPENCV_HAL_IMPL_SSE_SELECT(v_float32x4, OPENCV_HAL_NOP, OPENCV_HAL_NOP, ps) +OPENCV_HAL_IMPL_SSE_SELECT(v_float64x2, OPENCV_HAL_NOP, OPENCV_HAL_NOP, pd) + +#else // CV_SSE4_1 #define OPENCV_HAL_IMPL_SSE_SELECT(_Tpvec, suffix) \ inline _Tpvec v_select(const _Tpvec& mask, const _Tpvec& a, const _Tpvec& b) \ @@ -1083,71 +1863,41 @@ OPENCV_HAL_IMPL_SSE_SELECT(v_int32x4, si128) // OPENCV_HAL_IMPL_SSE_SELECT(v_int64x2, si128) OPENCV_HAL_IMPL_SSE_SELECT(v_float32x4, ps) OPENCV_HAL_IMPL_SSE_SELECT(v_float64x2, pd) +#endif -#define OPENCV_HAL_IMPL_SSE_EXPAND(_Tpuvec, _Tpwuvec, _Tpu, _Tpsvec, _Tpwsvec, _Tps, suffix, wsuffix, shift) \ -inline void v_expand(const _Tpuvec& a, _Tpwuvec& b0, _Tpwuvec& b1) \ -{ \ - __m128i z = _mm_setzero_si128(); \ - b0.val = _mm_unpacklo_##suffix(a.val, z); \ - b1.val = _mm_unpackhi_##suffix(a.val, z); \ -} \ -inline _Tpwuvec v_load_expand(const _Tpu* ptr) \ -{ \ - __m128i z = _mm_setzero_si128(); \ - return _Tpwuvec(_mm_unpacklo_##suffix(_mm_loadl_epi64((const __m128i*)ptr), z)); \ -} \ -inline void v_expand(const _Tpsvec& a, _Tpwsvec& b0, _Tpwsvec& b1) \ -{ \ - b0.val = _mm_srai_##wsuffix(_mm_unpacklo_##suffix(a.val, a.val), shift); \ - b1.val = _mm_srai_##wsuffix(_mm_unpackhi_##suffix(a.val, a.val), shift); \ -} \ -inline _Tpwsvec v_load_expand(const _Tps* ptr) \ -{ \ - __m128i a = _mm_loadl_epi64((const __m128i*)ptr); \ - return _Tpwsvec(_mm_srai_##wsuffix(_mm_unpacklo_##suffix(a, a), shift)); \ -} - -OPENCV_HAL_IMPL_SSE_EXPAND(v_uint8x16, v_uint16x8, uchar, v_int8x16, v_int16x8, schar, epi8, epi16, 8) -OPENCV_HAL_IMPL_SSE_EXPAND(v_uint16x8, v_uint32x4, ushort, v_int16x8, v_int32x4, short, epi16, epi32, 16) - -inline void v_expand(const v_uint32x4& a, v_uint64x2& b0, v_uint64x2& b1) -{ - __m128i z = _mm_setzero_si128(); - b0.val = _mm_unpacklo_epi32(a.val, z); - b1.val = _mm_unpackhi_epi32(a.val, z); -} -inline v_uint64x2 v_load_expand(const unsigned* ptr) -{ - __m128i z = _mm_setzero_si128(); - return v_uint64x2(_mm_unpacklo_epi32(_mm_loadl_epi64((const __m128i*)ptr), z)); -} -inline void v_expand(const v_int32x4& a, v_int64x2& b0, v_int64x2& b1) -{ - __m128i s = _mm_srai_epi32(a.val, 31); - b0.val = _mm_unpacklo_epi32(a.val, s); - b1.val = _mm_unpackhi_epi32(a.val, s); -} -inline v_int64x2 v_load_expand(const int* ptr) -{ - __m128i a = _mm_loadl_epi64((const __m128i*)ptr); - __m128i s = _mm_srai_epi32(a, 31); - return v_int64x2(_mm_unpacklo_epi32(a, s)); -} +/* Expand */ +#define OPENCV_HAL_IMPL_SSE_EXPAND(_Tpvec, _Tpwvec, _Tp, intrin) \ + inline void v_expand(const _Tpvec& a, _Tpwvec& b0, _Tpwvec& b1) \ + { \ + b0.val = intrin(a.val); \ + b1.val = __CV_CAT(intrin, _high)(a.val); \ + } \ + inline _Tpwvec v_expand_low(const _Tpvec& a) \ + { return _Tpwvec(intrin(a.val)); } \ + inline _Tpwvec v_expand_high(const _Tpvec& a) \ + { return _Tpwvec(__CV_CAT(intrin, _high)(a.val)); } \ + inline _Tpwvec v_load_expand(const _Tp* ptr) \ + { \ + __m128i a = _mm_loadl_epi64((const __m128i*)ptr); \ + return _Tpwvec(intrin(a)); \ + } -inline v_uint32x4 v_load_expand_q(const uchar* ptr) -{ - __m128i z = _mm_setzero_si128(); - __m128i a = _mm_cvtsi32_si128(*(const int*)ptr); - return v_uint32x4(_mm_unpacklo_epi16(_mm_unpacklo_epi8(a, z), z)); -} +OPENCV_HAL_IMPL_SSE_EXPAND(v_uint8x16, v_uint16x8, uchar, _v128_cvtepu8_epi16) +OPENCV_HAL_IMPL_SSE_EXPAND(v_int8x16, v_int16x8, schar, _v128_cvtepi8_epi16) +OPENCV_HAL_IMPL_SSE_EXPAND(v_uint16x8, v_uint32x4, ushort, _v128_cvtepu16_epi32) +OPENCV_HAL_IMPL_SSE_EXPAND(v_int16x8, v_int32x4, short, _v128_cvtepi16_epi32) +OPENCV_HAL_IMPL_SSE_EXPAND(v_uint32x4, v_uint64x2, unsigned, _v128_cvtepu32_epi64) +OPENCV_HAL_IMPL_SSE_EXPAND(v_int32x4, v_int64x2, int, _v128_cvtepi32_epi64) + +#define OPENCV_HAL_IMPL_SSE_EXPAND_Q(_Tpvec, _Tp, intrin) \ + inline _Tpvec v_load_expand_q(const _Tp* ptr) \ + { \ + __m128i a = _mm_cvtsi32_si128(*(const int*)ptr); \ + return _Tpvec(intrin(a)); \ + } -inline v_int32x4 v_load_expand_q(const schar* ptr) -{ - __m128i a = _mm_cvtsi32_si128(*(const int*)ptr); - a = _mm_unpacklo_epi8(a, a); - a = _mm_unpacklo_epi8(a, a); - return v_int32x4(_mm_srai_epi32(a, 24)); -} +OPENCV_HAL_IMPL_SSE_EXPAND_Q(v_uint32x4, uchar, _v128_cvtepu8_epi32) +OPENCV_HAL_IMPL_SSE_EXPAND_Q(v_int32x4, schar, _v128_cvtepi8_epi32) #define OPENCV_HAL_IMPL_SSE_UNPACKS(_Tpvec, suffix, cast_from, cast_to) \ inline void v_zip(const _Tpvec& a0, const _Tpvec& a1, _Tpvec& b0, _Tpvec& b1) \ @@ -1181,15 +1931,63 @@ OPENCV_HAL_IMPL_SSE_UNPACKS(v_int32x4, epi32, OPENCV_HAL_NOP, OPENCV_HAL_NOP) OPENCV_HAL_IMPL_SSE_UNPACKS(v_float32x4, ps, _mm_castps_si128, _mm_castsi128_ps) OPENCV_HAL_IMPL_SSE_UNPACKS(v_float64x2, pd, _mm_castpd_si128, _mm_castsi128_pd) +inline v_uint8x16 v_reverse(const v_uint8x16 &a) +{ +#if CV_SSSE3 + static const __m128i perm = _mm_setr_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + return v_uint8x16(_mm_shuffle_epi8(a.val, perm)); +#else + uchar CV_DECL_ALIGNED(32) d[16]; + v_store_aligned(d, a); + return v_uint8x16(d[15], d[14], d[13], d[12], d[11], d[10], d[9], d[8], d[7], d[6], d[5], d[4], d[3], d[2], d[1], d[0]); +#endif +} + +inline v_int8x16 v_reverse(const v_int8x16 &a) +{ return v_reinterpret_as_s8(v_reverse(v_reinterpret_as_u8(a))); } + +inline v_uint16x8 v_reverse(const v_uint16x8 &a) +{ +#if CV_SSSE3 + static const __m128i perm = _mm_setr_epi8(14, 15, 12, 13, 10, 11, 8, 9, 6, 7, 4, 5, 2, 3, 0, 1); + return v_uint16x8(_mm_shuffle_epi8(a.val, perm)); +#else + __m128i r = _mm_shuffle_epi32(a.val, _MM_SHUFFLE(0, 1, 2, 3)); + r = _mm_shufflelo_epi16(r, _MM_SHUFFLE(2, 3, 0, 1)); + r = _mm_shufflehi_epi16(r, _MM_SHUFFLE(2, 3, 0, 1)); + return v_uint16x8(r); +#endif +} + +inline v_int16x8 v_reverse(const v_int16x8 &a) +{ return v_reinterpret_as_s16(v_reverse(v_reinterpret_as_u16(a))); } + +inline v_uint32x4 v_reverse(const v_uint32x4 &a) +{ + return v_uint32x4(_mm_shuffle_epi32(a.val, _MM_SHUFFLE(0, 1, 2, 3))); +} + +inline v_int32x4 v_reverse(const v_int32x4 &a) +{ return v_reinterpret_as_s32(v_reverse(v_reinterpret_as_u32(a))); } + +inline v_float32x4 v_reverse(const v_float32x4 &a) +{ return v_reinterpret_as_f32(v_reverse(v_reinterpret_as_u32(a))); } + +inline v_uint64x2 v_reverse(const v_uint64x2 &a) +{ + return v_uint64x2(_mm_shuffle_epi32(a.val, _MM_SHUFFLE(1, 0, 3, 2))); +} + +inline v_int64x2 v_reverse(const v_int64x2 &a) +{ return v_reinterpret_as_s64(v_reverse(v_reinterpret_as_u64(a))); } + +inline v_float64x2 v_reverse(const v_float64x2 &a) +{ return v_reinterpret_as_f64(v_reverse(v_reinterpret_as_u64(a))); } + template inline _Tpvec v_extract(const _Tpvec& a, const _Tpvec& b) { - const int w = sizeof(typename _Tpvec::lane_type); - const int n = _Tpvec::nlanes; - __m128i ra, rb; - ra = _mm_srli_si128(a.val, s*w); - rb = _mm_slli_si128(b.val, (n-s)*w); - return _Tpvec(_mm_or_si128(ra, rb)); + return v_rotate_right(a, b); } inline v_int32x4 v_round(const v_float32x4& a) @@ -1215,6 +2013,12 @@ inline v_int32x4 v_trunc(const v_float32x4& a) inline v_int32x4 v_round(const v_float64x2& a) { return v_int32x4(_mm_cvtpd_epi32(a.val)); } +inline v_int32x4 v_round(const v_float64x2& a, const v_float64x2& b) +{ + __m128i ai = _mm_cvtpd_epi32(a.val), bi = _mm_cvtpd_epi32(b.val); + return v_int32x4(_mm_unpacklo_epi64(ai, bi)); +} + inline v_int32x4 v_floor(const v_float64x2& a) { __m128i a1 = _mm_cvtpd_epi32(a.val); @@ -1255,12 +2059,70 @@ OPENCV_HAL_IMPL_SSE_TRANSPOSE4x4(v_uint32x4, epi32, OPENCV_HAL_NOP, OPENCV_HAL_N OPENCV_HAL_IMPL_SSE_TRANSPOSE4x4(v_int32x4, epi32, OPENCV_HAL_NOP, OPENCV_HAL_NOP) OPENCV_HAL_IMPL_SSE_TRANSPOSE4x4(v_float32x4, ps, _mm_castps_si128, _mm_castsi128_ps) -// adopted from sse_utils.hpp -inline void v_load_deinterleave(const uchar* ptr, v_uint8x16& a, v_uint8x16& b, v_uint8x16& c) +// load deinterleave +inline void v_load_deinterleave(const uchar* ptr, v_uint8x16& a, v_uint8x16& b) { __m128i t00 = _mm_loadu_si128((const __m128i*)ptr); __m128i t01 = _mm_loadu_si128((const __m128i*)(ptr + 16)); - __m128i t02 = _mm_loadu_si128((const __m128i*)(ptr + 32)); + + __m128i t10 = _mm_unpacklo_epi8(t00, t01); + __m128i t11 = _mm_unpackhi_epi8(t00, t01); + + __m128i t20 = _mm_unpacklo_epi8(t10, t11); + __m128i t21 = _mm_unpackhi_epi8(t10, t11); + + __m128i t30 = _mm_unpacklo_epi8(t20, t21); + __m128i t31 = _mm_unpackhi_epi8(t20, t21); + + a.val = _mm_unpacklo_epi8(t30, t31); + b.val = _mm_unpackhi_epi8(t30, t31); +} + +inline void v_load_deinterleave(const uchar* ptr, v_uint8x16& a, v_uint8x16& b, v_uint8x16& c) +{ +#if CV_SSE4_1 + const __m128i m0 = _mm_setr_epi8(0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0); + const __m128i m1 = _mm_setr_epi8(0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0); + __m128i s0 = _mm_loadu_si128((const __m128i*)ptr); + __m128i s1 = _mm_loadu_si128((const __m128i*)(ptr + 16)); + __m128i s2 = _mm_loadu_si128((const __m128i*)(ptr + 32)); + __m128i a0 = _mm_blendv_epi8(_mm_blendv_epi8(s0, s1, m0), s2, m1); + __m128i b0 = _mm_blendv_epi8(_mm_blendv_epi8(s1, s2, m0), s0, m1); + __m128i c0 = _mm_blendv_epi8(_mm_blendv_epi8(s2, s0, m0), s1, m1); + const __m128i sh_b = _mm_setr_epi8(0, 3, 6, 9, 12, 15, 2, 5, 8, 11, 14, 1, 4, 7, 10, 13); + const __m128i sh_g = _mm_setr_epi8(1, 4, 7, 10, 13, 0, 3, 6, 9, 12, 15, 2, 5, 8, 11, 14); + const __m128i sh_r = _mm_setr_epi8(2, 5, 8, 11, 14, 1, 4, 7, 10, 13, 0, 3, 6, 9, 12, 15); + a0 = _mm_shuffle_epi8(a0, sh_b); + b0 = _mm_shuffle_epi8(b0, sh_g); + c0 = _mm_shuffle_epi8(c0, sh_r); + a.val = a0; + b.val = b0; + c.val = c0; +#elif CV_SSSE3 + const __m128i m0 = _mm_setr_epi8(0, 3, 6, 9, 12, 15, 1, 4, 7, 10, 13, 2, 5, 8, 11, 14); + const __m128i m1 = _mm_alignr_epi8(m0, m0, 11); + const __m128i m2 = _mm_alignr_epi8(m0, m0, 6); + + __m128i t0 = _mm_loadu_si128((const __m128i*)ptr); + __m128i t1 = _mm_loadu_si128((const __m128i*)(ptr + 16)); + __m128i t2 = _mm_loadu_si128((const __m128i*)(ptr + 32)); + + __m128i s0 = _mm_shuffle_epi8(t0, m0); + __m128i s1 = _mm_shuffle_epi8(t1, m1); + __m128i s2 = _mm_shuffle_epi8(t2, m2); + + t0 = _mm_alignr_epi8(s1, _mm_slli_si128(s0, 10), 5); + a.val = _mm_alignr_epi8(s2, t0, 5); + + t1 = _mm_alignr_epi8(_mm_srli_si128(s1, 5), _mm_slli_si128(s0, 5), 6); + b.val = _mm_alignr_epi8(_mm_srli_si128(s2, 5), t1, 5); + + t2 = _mm_alignr_epi8(_mm_srli_si128(s2, 10), s1, 11); + c.val = _mm_alignr_epi8(t2, s0, 11); +#else + __m128i t00 = _mm_loadu_si128((const __m128i*)ptr); + __m128i t01 = _mm_loadu_si128((const __m128i*)(ptr + 16)); + __m128i t02 = _mm_loadu_si128((const __m128i*)(ptr + 32)); __m128i t10 = _mm_unpacklo_epi8(t00, _mm_unpackhi_epi64(t01, t01)); __m128i t11 = _mm_unpacklo_epi8(_mm_unpackhi_epi64(t00, t00), t02); @@ -1277,6 +2139,7 @@ inline void v_load_deinterleave(const uchar* ptr, v_uint8x16& a, v_uint8x16& b, a.val = _mm_unpacklo_epi8(t30, _mm_unpackhi_epi64(t31, t31)); b.val = _mm_unpacklo_epi8(_mm_unpackhi_epi64(t30, t30), t32); c.val = _mm_unpacklo_epi8(t31, _mm_unpackhi_epi64(t32, t32)); +#endif } inline void v_load_deinterleave(const uchar* ptr, v_uint8x16& a, v_uint8x16& b, v_uint8x16& c, v_uint8x16& d) @@ -1307,8 +2170,41 @@ inline void v_load_deinterleave(const uchar* ptr, v_uint8x16& a, v_uint8x16& b, d.val = _mm_unpackhi_epi8(v2, v3); } +inline void v_load_deinterleave(const ushort* ptr, v_uint16x8& a, v_uint16x8& b) +{ + __m128i v0 = _mm_loadu_si128((__m128i*)(ptr)); // a0 b0 a1 b1 a2 b2 a3 b3 + __m128i v1 = _mm_loadu_si128((__m128i*)(ptr + 8)); // a4 b4 a5 b5 a6 b6 a7 b7 + + __m128i v2 = _mm_unpacklo_epi16(v0, v1); // a0 a4 b0 b4 a1 a5 b1 b5 + __m128i v3 = _mm_unpackhi_epi16(v0, v1); // a2 a6 b2 b6 a3 a7 b3 b7 + __m128i v4 = _mm_unpacklo_epi16(v2, v3); // a0 a2 a4 a6 b0 b2 b4 b6 + __m128i v5 = _mm_unpackhi_epi16(v2, v3); // a1 a3 a5 a7 b1 b3 b5 b7 + + a.val = _mm_unpacklo_epi16(v4, v5); // a0 a1 a2 a3 a4 a5 a6 a7 + b.val = _mm_unpackhi_epi16(v4, v5); // b0 b1 ab b3 b4 b5 b6 b7 +} + inline void v_load_deinterleave(const ushort* ptr, v_uint16x8& a, v_uint16x8& b, v_uint16x8& c) { +#if CV_SSE4_1 + __m128i v0 = _mm_loadu_si128((__m128i*)(ptr)); + __m128i v1 = _mm_loadu_si128((__m128i*)(ptr + 8)); + __m128i v2 = _mm_loadu_si128((__m128i*)(ptr + 16)); + __m128i a0 = _mm_blend_epi16(_mm_blend_epi16(v0, v1, 0x92), v2, 0x24); + __m128i b0 = _mm_blend_epi16(_mm_blend_epi16(v2, v0, 0x92), v1, 0x24); + __m128i c0 = _mm_blend_epi16(_mm_blend_epi16(v1, v2, 0x92), v0, 0x24); + + const __m128i sh_a = _mm_setr_epi8(0, 1, 6, 7, 12, 13, 2, 3, 8, 9, 14, 15, 4, 5, 10, 11); + const __m128i sh_b = _mm_setr_epi8(2, 3, 8, 9, 14, 15, 4, 5, 10, 11, 0, 1, 6, 7, 12, 13); + const __m128i sh_c = _mm_setr_epi8(4, 5, 10, 11, 0, 1, 6, 7, 12, 13, 2, 3, 8, 9, 14, 15); + a0 = _mm_shuffle_epi8(a0, sh_a); + b0 = _mm_shuffle_epi8(b0, sh_b); + c0 = _mm_shuffle_epi8(c0, sh_c); + + a.val = a0; + b.val = b0; + c.val = c0; +#else __m128i t00 = _mm_loadu_si128((const __m128i*)ptr); __m128i t01 = _mm_loadu_si128((const __m128i*)(ptr + 8)); __m128i t02 = _mm_loadu_si128((const __m128i*)(ptr + 16)); @@ -1324,6 +2220,7 @@ inline void v_load_deinterleave(const ushort* ptr, v_uint16x8& a, v_uint16x8& b, a.val = _mm_unpacklo_epi16(t20, _mm_unpackhi_epi64(t21, t21)); b.val = _mm_unpacklo_epi16(_mm_unpackhi_epi64(t20, t20), t22); c.val = _mm_unpacklo_epi16(t21, _mm_unpackhi_epi64(t22, t22)); +#endif } inline void v_load_deinterleave(const ushort* ptr, v_uint16x8& a, v_uint16x8& b, v_uint16x8& c, v_uint16x8& d) @@ -1349,6 +2246,18 @@ inline void v_load_deinterleave(const ushort* ptr, v_uint16x8& a, v_uint16x8& b, d.val = _mm_unpackhi_epi16(u2, u3); } +inline void v_load_deinterleave(const unsigned* ptr, v_uint32x4& a, v_uint32x4& b) +{ + __m128i v0 = _mm_loadu_si128((__m128i*)(ptr)); // a0 b0 a1 b1 + __m128i v1 = _mm_loadu_si128((__m128i*)(ptr + 4)); // a2 b2 a3 b3 + + __m128i v2 = _mm_unpacklo_epi32(v0, v1); // a0 a2 b0 b2 + __m128i v3 = _mm_unpackhi_epi32(v0, v1); // a1 a3 b1 b3 + + a.val = _mm_unpacklo_epi32(v2, v3); // a0 a1 a2 a3 + b.val = _mm_unpackhi_epi32(v2, v3); // b0 b1 ab b3 +} + inline void v_load_deinterleave(const unsigned* ptr, v_uint32x4& a, v_uint32x4& b, v_uint32x4& c) { __m128i t00 = _mm_loadu_si128((const __m128i*)ptr); @@ -1366,17 +2275,150 @@ inline void v_load_deinterleave(const unsigned* ptr, v_uint32x4& a, v_uint32x4& inline void v_load_deinterleave(const unsigned* ptr, v_uint32x4& a, v_uint32x4& b, v_uint32x4& c, v_uint32x4& d) { - v_uint32x4 u0(_mm_loadu_si128((const __m128i*)ptr)); // a0 b0 c0 d0 - v_uint32x4 u1(_mm_loadu_si128((const __m128i*)(ptr + 4))); // a1 b1 c1 d1 - v_uint32x4 u2(_mm_loadu_si128((const __m128i*)(ptr + 8))); // a2 b2 c2 d2 - v_uint32x4 u3(_mm_loadu_si128((const __m128i*)(ptr + 12))); // a3 b3 c3 d3 + v_uint32x4 s0(_mm_loadu_si128((const __m128i*)ptr)); // a0 b0 c0 d0 + v_uint32x4 s1(_mm_loadu_si128((const __m128i*)(ptr + 4))); // a1 b1 c1 d1 + v_uint32x4 s2(_mm_loadu_si128((const __m128i*)(ptr + 8))); // a2 b2 c2 d2 + v_uint32x4 s3(_mm_loadu_si128((const __m128i*)(ptr + 12))); // a3 b3 c3 d3 + + v_transpose4x4(s0, s1, s2, s3, a, b, c, d); +} + +inline void v_load_deinterleave(const float* ptr, v_float32x4& a, v_float32x4& b) +{ + __m128 u0 = _mm_loadu_ps(ptr); // a0 b0 a1 b1 + __m128 u1 = _mm_loadu_ps((ptr + 4)); // a2 b2 a3 b3 - v_transpose4x4(u0, u1, u2, u3, a, b, c, d); + a.val = _mm_shuffle_ps(u0, u1, _MM_SHUFFLE(2, 0, 2, 0)); // a0 a1 a2 a3 + b.val = _mm_shuffle_ps(u0, u1, _MM_SHUFFLE(3, 1, 3, 1)); // b0 b1 ab b3 } +inline void v_load_deinterleave(const float* ptr, v_float32x4& a, v_float32x4& b, v_float32x4& c) +{ + __m128 t0 = _mm_loadu_ps(ptr + 0); + __m128 t1 = _mm_loadu_ps(ptr + 4); + __m128 t2 = _mm_loadu_ps(ptr + 8); + + __m128 at12 = _mm_shuffle_ps(t1, t2, _MM_SHUFFLE(0, 1, 0, 2)); + a.val = _mm_shuffle_ps(t0, at12, _MM_SHUFFLE(2, 0, 3, 0)); + + __m128 bt01 = _mm_shuffle_ps(t0, t1, _MM_SHUFFLE(0, 0, 0, 1)); + __m128 bt12 = _mm_shuffle_ps(t1, t2, _MM_SHUFFLE(0, 2, 0, 3)); + b.val = _mm_shuffle_ps(bt01, bt12, _MM_SHUFFLE(2, 0, 2, 0)); + + __m128 ct01 = _mm_shuffle_ps(t0, t1, _MM_SHUFFLE(0, 1, 0, 2)); + c.val = _mm_shuffle_ps(ct01, t2, _MM_SHUFFLE(3, 0, 2, 0)); +} + +inline void v_load_deinterleave(const float* ptr, v_float32x4& a, v_float32x4& b, v_float32x4& c, v_float32x4& d) +{ + __m128 t0 = _mm_loadu_ps(ptr + 0); + __m128 t1 = _mm_loadu_ps(ptr + 4); + __m128 t2 = _mm_loadu_ps(ptr + 8); + __m128 t3 = _mm_loadu_ps(ptr + 12); + __m128 t02lo = _mm_unpacklo_ps(t0, t2); + __m128 t13lo = _mm_unpacklo_ps(t1, t3); + __m128 t02hi = _mm_unpackhi_ps(t0, t2); + __m128 t13hi = _mm_unpackhi_ps(t1, t3); + a.val = _mm_unpacklo_ps(t02lo, t13lo); + b.val = _mm_unpackhi_ps(t02lo, t13lo); + c.val = _mm_unpacklo_ps(t02hi, t13hi); + d.val = _mm_unpackhi_ps(t02hi, t13hi); +} + +inline void v_load_deinterleave(const uint64 *ptr, v_uint64x2& a, v_uint64x2& b) +{ + __m128i t0 = _mm_loadu_si128((const __m128i*)ptr); + __m128i t1 = _mm_loadu_si128((const __m128i*)(ptr + 2)); + + a = v_uint64x2(_mm_unpacklo_epi64(t0, t1)); + b = v_uint64x2(_mm_unpackhi_epi64(t0, t1)); +} + +inline void v_load_deinterleave(const uint64 *ptr, v_uint64x2& a, v_uint64x2& b, v_uint64x2& c) +{ + __m128i t0 = _mm_loadu_si128((const __m128i*)ptr); // a0, b0 + __m128i t1 = _mm_loadu_si128((const __m128i*)(ptr + 2)); // c0, a1 + __m128i t2 = _mm_loadu_si128((const __m128i*)(ptr + 4)); // b1, c1 + + t1 = _mm_shuffle_epi32(t1, 0x4e); // a1, c0 + + a = v_uint64x2(_mm_unpacklo_epi64(t0, t1)); + b = v_uint64x2(_mm_unpacklo_epi64(_mm_unpackhi_epi64(t0, t0), t2)); + c = v_uint64x2(_mm_unpackhi_epi64(t1, t2)); +} + +inline void v_load_deinterleave(const uint64 *ptr, v_uint64x2& a, + v_uint64x2& b, v_uint64x2& c, v_uint64x2& d) +{ + __m128i t0 = _mm_loadu_si128((const __m128i*)ptr); // a0 b0 + __m128i t1 = _mm_loadu_si128((const __m128i*)(ptr + 2)); // c0 d0 + __m128i t2 = _mm_loadu_si128((const __m128i*)(ptr + 4)); // a1 b1 + __m128i t3 = _mm_loadu_si128((const __m128i*)(ptr + 6)); // c1 d1 + + a = v_uint64x2(_mm_unpacklo_epi64(t0, t2)); + b = v_uint64x2(_mm_unpackhi_epi64(t0, t2)); + c = v_uint64x2(_mm_unpacklo_epi64(t1, t3)); + d = v_uint64x2(_mm_unpackhi_epi64(t1, t3)); +} + +// store interleave + inline void v_store_interleave( uchar* ptr, const v_uint8x16& a, const v_uint8x16& b, - const v_uint8x16& c ) + hal::StoreMode mode = hal::STORE_UNALIGNED) { + __m128i v0 = _mm_unpacklo_epi8(a.val, b.val); + __m128i v1 = _mm_unpackhi_epi8(a.val, b.val); + + if( mode == hal::STORE_ALIGNED_NOCACHE ) + { + _mm_stream_si128((__m128i*)(ptr), v0); + _mm_stream_si128((__m128i*)(ptr + 16), v1); + } + else if( mode == hal::STORE_ALIGNED ) + { + _mm_store_si128((__m128i*)(ptr), v0); + _mm_store_si128((__m128i*)(ptr + 16), v1); + } + else + { + _mm_storeu_si128((__m128i*)(ptr), v0); + _mm_storeu_si128((__m128i*)(ptr + 16), v1); + } +} + +inline void v_store_interleave( uchar* ptr, const v_uint8x16& a, const v_uint8x16& b, + const v_uint8x16& c, hal::StoreMode mode = hal::STORE_UNALIGNED) +{ +#if CV_SSE4_1 + const __m128i sh_a = _mm_setr_epi8(0, 11, 6, 1, 12, 7, 2, 13, 8, 3, 14, 9, 4, 15, 10, 5); + const __m128i sh_b = _mm_setr_epi8(5, 0, 11, 6, 1, 12, 7, 2, 13, 8, 3, 14, 9, 4, 15, 10); + const __m128i sh_c = _mm_setr_epi8(10, 5, 0, 11, 6, 1, 12, 7, 2, 13, 8, 3, 14, 9, 4, 15); + __m128i a0 = _mm_shuffle_epi8(a.val, sh_a); + __m128i b0 = _mm_shuffle_epi8(b.val, sh_b); + __m128i c0 = _mm_shuffle_epi8(c.val, sh_c); + + const __m128i m0 = _mm_setr_epi8(0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0); + const __m128i m1 = _mm_setr_epi8(0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0); + __m128i v0 = _mm_blendv_epi8(_mm_blendv_epi8(a0, b0, m1), c0, m0); + __m128i v1 = _mm_blendv_epi8(_mm_blendv_epi8(b0, c0, m1), a0, m0); + __m128i v2 = _mm_blendv_epi8(_mm_blendv_epi8(c0, a0, m1), b0, m0); +#elif CV_SSSE3 + const __m128i m0 = _mm_setr_epi8(0, 6, 11, 1, 7, 12, 2, 8, 13, 3, 9, 14, 4, 10, 15, 5); + const __m128i m1 = _mm_setr_epi8(5, 11, 0, 6, 12, 1, 7, 13, 2, 8, 14, 3, 9, 15, 4, 10); + const __m128i m2 = _mm_setr_epi8(10, 0, 5, 11, 1, 6, 12, 2, 7, 13, 3, 8, 14, 4, 9, 15); + + __m128i t0 = _mm_alignr_epi8(b.val, _mm_slli_si128(a.val, 10), 5); + t0 = _mm_alignr_epi8(c.val, t0, 5); + __m128i v0 = _mm_shuffle_epi8(t0, m0); + + __m128i t1 = _mm_alignr_epi8(_mm_srli_si128(b.val, 5), _mm_slli_si128(a.val, 5), 6); + t1 = _mm_alignr_epi8(_mm_srli_si128(c.val, 5), t1, 5); + __m128i v1 = _mm_shuffle_epi8(t1, m1); + + __m128i t2 = _mm_alignr_epi8(_mm_srli_si128(c.val, 10), b.val, 11); + t2 = _mm_alignr_epi8(t2, a.val, 11); + __m128i v2 = _mm_shuffle_epi8(t2, m2); +#else __m128i z = _mm_setzero_si128(); __m128i ab0 = _mm_unpacklo_epi8(a.val, b.val); __m128i ab1 = _mm_unpackhi_epi8(a.val, b.val); @@ -1414,14 +2456,31 @@ inline void v_store_interleave( uchar* ptr, const v_uint8x16& a, const v_uint8x1 __m128i v0 = _mm_or_si128(_mm_srli_si128(p40, 2), _mm_slli_si128(p41, 10)); __m128i v1 = _mm_or_si128(_mm_srli_si128(p41, 6), _mm_slli_si128(p42, 6)); __m128i v2 = _mm_or_si128(_mm_srli_si128(p42, 10), _mm_slli_si128(p43, 2)); +#endif - _mm_storeu_si128((__m128i*)(ptr), v0); - _mm_storeu_si128((__m128i*)(ptr + 16), v1); - _mm_storeu_si128((__m128i*)(ptr + 32), v2); + if( mode == hal::STORE_ALIGNED_NOCACHE ) + { + _mm_stream_si128((__m128i*)(ptr), v0); + _mm_stream_si128((__m128i*)(ptr + 16), v1); + _mm_stream_si128((__m128i*)(ptr + 32), v2); + } + else if( mode == hal::STORE_ALIGNED ) + { + _mm_store_si128((__m128i*)(ptr), v0); + _mm_store_si128((__m128i*)(ptr + 16), v1); + _mm_store_si128((__m128i*)(ptr + 32), v2); + } + else + { + _mm_storeu_si128((__m128i*)(ptr), v0); + _mm_storeu_si128((__m128i*)(ptr + 16), v1); + _mm_storeu_si128((__m128i*)(ptr + 32), v2); + } } inline void v_store_interleave( uchar* ptr, const v_uint8x16& a, const v_uint8x16& b, - const v_uint8x16& c, const v_uint8x16& d) + const v_uint8x16& c, const v_uint8x16& d, + hal::StoreMode mode = hal::STORE_UNALIGNED) { // a0 a1 a2 a3 .... // b0 b1 b2 b3 .... @@ -1433,20 +2492,72 @@ inline void v_store_interleave( uchar* ptr, const v_uint8x16& a, const v_uint8x1 __m128i u3 = _mm_unpackhi_epi8(b.val, d.val); // b8 d8 b9 d9 ... __m128i v0 = _mm_unpacklo_epi8(u0, u2); // a0 b0 c0 d0 ... - __m128i v1 = _mm_unpacklo_epi8(u1, u3); // a8 b8 c8 d8 ... - __m128i v2 = _mm_unpackhi_epi8(u0, u2); // a4 b4 c4 d4 ... + __m128i v1 = _mm_unpackhi_epi8(u0, u2); // a4 b4 c4 d4 ... + __m128i v2 = _mm_unpacklo_epi8(u1, u3); // a8 b8 c8 d8 ... __m128i v3 = _mm_unpackhi_epi8(u1, u3); // a12 b12 c12 d12 ... - _mm_storeu_si128((__m128i*)ptr, v0); - _mm_storeu_si128((__m128i*)(ptr + 16), v2); - _mm_storeu_si128((__m128i*)(ptr + 32), v1); - _mm_storeu_si128((__m128i*)(ptr + 48), v3); + if( mode == hal::STORE_ALIGNED_NOCACHE ) + { + _mm_stream_si128((__m128i*)(ptr), v0); + _mm_stream_si128((__m128i*)(ptr + 16), v1); + _mm_stream_si128((__m128i*)(ptr + 32), v2); + _mm_stream_si128((__m128i*)(ptr + 48), v3); + } + else if( mode == hal::STORE_ALIGNED ) + { + _mm_store_si128((__m128i*)(ptr), v0); + _mm_store_si128((__m128i*)(ptr + 16), v1); + _mm_store_si128((__m128i*)(ptr + 32), v2); + _mm_store_si128((__m128i*)(ptr + 48), v3); + } + else + { + _mm_storeu_si128((__m128i*)(ptr), v0); + _mm_storeu_si128((__m128i*)(ptr + 16), v1); + _mm_storeu_si128((__m128i*)(ptr + 32), v2); + _mm_storeu_si128((__m128i*)(ptr + 48), v3); + } } -inline void v_store_interleave( ushort* ptr, const v_uint16x8& a, - const v_uint16x8& b, - const v_uint16x8& c ) +inline void v_store_interleave( ushort* ptr, const v_uint16x8& a, const v_uint16x8& b, + hal::StoreMode mode = hal::STORE_UNALIGNED) { + __m128i v0 = _mm_unpacklo_epi16(a.val, b.val); + __m128i v1 = _mm_unpackhi_epi16(a.val, b.val); + + if( mode == hal::STORE_ALIGNED_NOCACHE ) + { + _mm_stream_si128((__m128i*)(ptr), v0); + _mm_stream_si128((__m128i*)(ptr + 8), v1); + } + else if( mode == hal::STORE_ALIGNED ) + { + _mm_store_si128((__m128i*)(ptr), v0); + _mm_store_si128((__m128i*)(ptr + 8), v1); + } + else + { + _mm_storeu_si128((__m128i*)(ptr), v0); + _mm_storeu_si128((__m128i*)(ptr + 8), v1); + } +} + +inline void v_store_interleave( ushort* ptr, const v_uint16x8& a, + const v_uint16x8& b, const v_uint16x8& c, + hal::StoreMode mode = hal::STORE_UNALIGNED) +{ +#if CV_SSE4_1 + const __m128i sh_a = _mm_setr_epi8(0, 1, 6, 7, 12, 13, 2, 3, 8, 9, 14, 15, 4, 5, 10, 11); + const __m128i sh_b = _mm_setr_epi8(10, 11, 0, 1, 6, 7, 12, 13, 2, 3, 8, 9, 14, 15, 4, 5); + const __m128i sh_c = _mm_setr_epi8(4, 5, 10, 11, 0, 1, 6, 7, 12, 13, 2, 3, 8, 9, 14, 15); + __m128i a0 = _mm_shuffle_epi8(a.val, sh_a); + __m128i b0 = _mm_shuffle_epi8(b.val, sh_b); + __m128i c0 = _mm_shuffle_epi8(c.val, sh_c); + + __m128i v0 = _mm_blend_epi16(_mm_blend_epi16(a0, b0, 0x92), c0, 0x24); + __m128i v1 = _mm_blend_epi16(_mm_blend_epi16(c0, a0, 0x92), b0, 0x24); + __m128i v2 = _mm_blend_epi16(_mm_blend_epi16(b0, c0, 0x92), a0, 0x24); +#else __m128i z = _mm_setzero_si128(); __m128i ab0 = _mm_unpacklo_epi16(a.val, b.val); __m128i ab1 = _mm_unpackhi_epi16(a.val, b.val); @@ -1474,14 +2585,30 @@ inline void v_store_interleave( ushort* ptr, const v_uint16x8& a, __m128i v0 = _mm_or_si128(_mm_srli_si128(p30, 2), _mm_slli_si128(p31, 10)); __m128i v1 = _mm_or_si128(_mm_srli_si128(p31, 6), _mm_slli_si128(p32, 6)); __m128i v2 = _mm_or_si128(_mm_srli_si128(p32, 10), _mm_slli_si128(p33, 2)); - - _mm_storeu_si128((__m128i*)(ptr), v0); - _mm_storeu_si128((__m128i*)(ptr + 8), v1); - _mm_storeu_si128((__m128i*)(ptr + 16), v2); +#endif + if( mode == hal::STORE_ALIGNED_NOCACHE ) + { + _mm_stream_si128((__m128i*)(ptr), v0); + _mm_stream_si128((__m128i*)(ptr + 8), v1); + _mm_stream_si128((__m128i*)(ptr + 16), v2); + } + else if( mode == hal::STORE_ALIGNED ) + { + _mm_store_si128((__m128i*)(ptr), v0); + _mm_store_si128((__m128i*)(ptr + 8), v1); + _mm_store_si128((__m128i*)(ptr + 16), v2); + } + else + { + _mm_storeu_si128((__m128i*)(ptr), v0); + _mm_storeu_si128((__m128i*)(ptr + 8), v1); + _mm_storeu_si128((__m128i*)(ptr + 16), v2); + } } inline void v_store_interleave( ushort* ptr, const v_uint16x8& a, const v_uint16x8& b, - const v_uint16x8& c, const v_uint16x8& d) + const v_uint16x8& c, const v_uint16x8& d, + hal::StoreMode mode = hal::STORE_UNALIGNED) { // a0 a1 a2 a3 .... // b0 b1 b2 b3 .... @@ -1493,18 +2620,58 @@ inline void v_store_interleave( ushort* ptr, const v_uint16x8& a, const v_uint16 __m128i u3 = _mm_unpackhi_epi16(b.val, d.val); // b4 d4 b5 d5 ... __m128i v0 = _mm_unpacklo_epi16(u0, u2); // a0 b0 c0 d0 ... - __m128i v1 = _mm_unpacklo_epi16(u1, u3); // a4 b4 c4 d4 ... - __m128i v2 = _mm_unpackhi_epi16(u0, u2); // a2 b2 c2 d2 ... + __m128i v1 = _mm_unpackhi_epi16(u0, u2); // a2 b2 c2 d2 ... + __m128i v2 = _mm_unpacklo_epi16(u1, u3); // a4 b4 c4 d4 ... __m128i v3 = _mm_unpackhi_epi16(u1, u3); // a6 b6 c6 d6 ... - _mm_storeu_si128((__m128i*)ptr, v0); - _mm_storeu_si128((__m128i*)(ptr + 8), v2); - _mm_storeu_si128((__m128i*)(ptr + 16), v1); - _mm_storeu_si128((__m128i*)(ptr + 24), v3); + if( mode == hal::STORE_ALIGNED_NOCACHE ) + { + _mm_stream_si128((__m128i*)(ptr), v0); + _mm_stream_si128((__m128i*)(ptr + 8), v1); + _mm_stream_si128((__m128i*)(ptr + 16), v2); + _mm_stream_si128((__m128i*)(ptr + 24), v3); + } + else if( mode == hal::STORE_ALIGNED ) + { + _mm_store_si128((__m128i*)(ptr), v0); + _mm_store_si128((__m128i*)(ptr + 8), v1); + _mm_store_si128((__m128i*)(ptr + 16), v2); + _mm_store_si128((__m128i*)(ptr + 24), v3); + } + else + { + _mm_storeu_si128((__m128i*)(ptr), v0); + _mm_storeu_si128((__m128i*)(ptr + 8), v1); + _mm_storeu_si128((__m128i*)(ptr + 16), v2); + _mm_storeu_si128((__m128i*)(ptr + 24), v3); + } +} + +inline void v_store_interleave( unsigned* ptr, const v_uint32x4& a, const v_uint32x4& b, + hal::StoreMode mode = hal::STORE_UNALIGNED) +{ + __m128i v0 = _mm_unpacklo_epi32(a.val, b.val); + __m128i v1 = _mm_unpackhi_epi32(a.val, b.val); + + if( mode == hal::STORE_ALIGNED_NOCACHE ) + { + _mm_stream_si128((__m128i*)(ptr), v0); + _mm_stream_si128((__m128i*)(ptr + 4), v1); + } + else if( mode == hal::STORE_ALIGNED ) + { + _mm_store_si128((__m128i*)(ptr), v0); + _mm_store_si128((__m128i*)(ptr + 4), v1); + } + else + { + _mm_storeu_si128((__m128i*)(ptr), v0); + _mm_storeu_si128((__m128i*)(ptr + 4), v1); + } } inline void v_store_interleave( unsigned* ptr, const v_uint32x4& a, const v_uint32x4& b, - const v_uint32x4& c ) + const v_uint32x4& c, hal::StoreMode mode = hal::STORE_UNALIGNED) { v_uint32x4 z = v_setzero_u32(), u0, u1, u2, u3; v_transpose4x4(a, b, c, z, u0, u1, u2, u3); @@ -1513,64 +2680,287 @@ inline void v_store_interleave( unsigned* ptr, const v_uint32x4& a, const v_uint __m128i v1 = _mm_or_si128(_mm_srli_si128(u1.val, 4), _mm_slli_si128(u2.val, 8)); __m128i v2 = _mm_or_si128(_mm_srli_si128(u2.val, 8), _mm_slli_si128(u3.val, 4)); - _mm_storeu_si128((__m128i*)ptr, v0); - _mm_storeu_si128((__m128i*)(ptr + 4), v1); - _mm_storeu_si128((__m128i*)(ptr + 8), v2); + if( mode == hal::STORE_ALIGNED_NOCACHE ) + { + _mm_stream_si128((__m128i*)(ptr), v0); + _mm_stream_si128((__m128i*)(ptr + 4), v1); + _mm_stream_si128((__m128i*)(ptr + 8), v2); + } + else if( mode == hal::STORE_ALIGNED ) + { + _mm_store_si128((__m128i*)(ptr), v0); + _mm_store_si128((__m128i*)(ptr + 4), v1); + _mm_store_si128((__m128i*)(ptr + 8), v2); + } + else + { + _mm_storeu_si128((__m128i*)(ptr), v0); + _mm_storeu_si128((__m128i*)(ptr + 4), v1); + _mm_storeu_si128((__m128i*)(ptr + 8), v2); + } } inline void v_store_interleave(unsigned* ptr, const v_uint32x4& a, const v_uint32x4& b, - const v_uint32x4& c, const v_uint32x4& d) + const v_uint32x4& c, const v_uint32x4& d, + hal::StoreMode mode = hal::STORE_UNALIGNED) +{ + v_uint32x4 v0, v1, v2, v3; + v_transpose4x4(a, b, c, d, v0, v1, v2, v3); + + if( mode == hal::STORE_ALIGNED_NOCACHE ) + { + _mm_stream_si128((__m128i*)(ptr), v0.val); + _mm_stream_si128((__m128i*)(ptr + 4), v1.val); + _mm_stream_si128((__m128i*)(ptr + 8), v2.val); + _mm_stream_si128((__m128i*)(ptr + 12), v3.val); + } + else if( mode == hal::STORE_ALIGNED ) + { + _mm_store_si128((__m128i*)(ptr), v0.val); + _mm_store_si128((__m128i*)(ptr + 4), v1.val); + _mm_store_si128((__m128i*)(ptr + 8), v2.val); + _mm_store_si128((__m128i*)(ptr + 12), v3.val); + } + else + { + _mm_storeu_si128((__m128i*)(ptr), v0.val); + _mm_storeu_si128((__m128i*)(ptr + 4), v1.val); + _mm_storeu_si128((__m128i*)(ptr + 8), v2.val); + _mm_storeu_si128((__m128i*)(ptr + 12), v3.val); + } +} + +// 2-channel, float only +inline void v_store_interleave(float* ptr, const v_float32x4& a, const v_float32x4& b, + hal::StoreMode mode = hal::STORE_UNALIGNED) +{ + __m128 v0 = _mm_unpacklo_ps(a.val, b.val); // a0 b0 a1 b1 + __m128 v1 = _mm_unpackhi_ps(a.val, b.val); // a2 b2 a3 b3 + + if( mode == hal::STORE_ALIGNED_NOCACHE ) + { + _mm_stream_ps(ptr, v0); + _mm_stream_ps(ptr + 4, v1); + } + else if( mode == hal::STORE_ALIGNED ) + { + _mm_store_ps(ptr, v0); + _mm_store_ps(ptr + 4, v1); + } + else + { + _mm_storeu_ps(ptr, v0); + _mm_storeu_ps(ptr + 4, v1); + } +} + +inline void v_store_interleave(float* ptr, const v_float32x4& a, const v_float32x4& b, + const v_float32x4& c, hal::StoreMode mode = hal::STORE_UNALIGNED) +{ + __m128 u0 = _mm_shuffle_ps(a.val, b.val, _MM_SHUFFLE(0, 0, 0, 0)); + __m128 u1 = _mm_shuffle_ps(c.val, a.val, _MM_SHUFFLE(1, 1, 0, 0)); + __m128 v0 = _mm_shuffle_ps(u0, u1, _MM_SHUFFLE(2, 0, 2, 0)); + __m128 u2 = _mm_shuffle_ps(b.val, c.val, _MM_SHUFFLE(1, 1, 1, 1)); + __m128 u3 = _mm_shuffle_ps(a.val, b.val, _MM_SHUFFLE(2, 2, 2, 2)); + __m128 v1 = _mm_shuffle_ps(u2, u3, _MM_SHUFFLE(2, 0, 2, 0)); + __m128 u4 = _mm_shuffle_ps(c.val, a.val, _MM_SHUFFLE(3, 3, 2, 2)); + __m128 u5 = _mm_shuffle_ps(b.val, c.val, _MM_SHUFFLE(3, 3, 3, 3)); + __m128 v2 = _mm_shuffle_ps(u4, u5, _MM_SHUFFLE(2, 0, 2, 0)); + + if( mode == hal::STORE_ALIGNED_NOCACHE ) + { + _mm_stream_ps(ptr, v0); + _mm_stream_ps(ptr + 4, v1); + _mm_stream_ps(ptr + 8, v2); + } + else if( mode == hal::STORE_ALIGNED ) + { + _mm_store_ps(ptr, v0); + _mm_store_ps(ptr + 4, v1); + _mm_store_ps(ptr + 8, v2); + } + else + { + _mm_storeu_ps(ptr, v0); + _mm_storeu_ps(ptr + 4, v1); + _mm_storeu_ps(ptr + 8, v2); + } +} + +inline void v_store_interleave(float* ptr, const v_float32x4& a, const v_float32x4& b, + const v_float32x4& c, const v_float32x4& d, + hal::StoreMode mode = hal::STORE_UNALIGNED) +{ + __m128 u0 = _mm_unpacklo_ps(a.val, c.val); + __m128 u1 = _mm_unpacklo_ps(b.val, d.val); + __m128 u2 = _mm_unpackhi_ps(a.val, c.val); + __m128 u3 = _mm_unpackhi_ps(b.val, d.val); + __m128 v0 = _mm_unpacklo_ps(u0, u1); + __m128 v2 = _mm_unpacklo_ps(u2, u3); + __m128 v1 = _mm_unpackhi_ps(u0, u1); + __m128 v3 = _mm_unpackhi_ps(u2, u3); + + if( mode == hal::STORE_ALIGNED_NOCACHE ) + { + _mm_stream_ps(ptr, v0); + _mm_stream_ps(ptr + 4, v1); + _mm_stream_ps(ptr + 8, v2); + _mm_stream_ps(ptr + 12, v3); + } + else if( mode == hal::STORE_ALIGNED ) + { + _mm_store_ps(ptr, v0); + _mm_store_ps(ptr + 4, v1); + _mm_store_ps(ptr + 8, v2); + _mm_store_ps(ptr + 12, v3); + } + else + { + _mm_storeu_ps(ptr, v0); + _mm_storeu_ps(ptr + 4, v1); + _mm_storeu_ps(ptr + 8, v2); + _mm_storeu_ps(ptr + 12, v3); + } +} + +inline void v_store_interleave(uint64 *ptr, const v_uint64x2& a, const v_uint64x2& b, + hal::StoreMode mode = hal::STORE_UNALIGNED) +{ + __m128i v0 = _mm_unpacklo_epi64(a.val, b.val); + __m128i v1 = _mm_unpackhi_epi64(a.val, b.val); + + if( mode == hal::STORE_ALIGNED_NOCACHE ) + { + _mm_stream_si128((__m128i*)(ptr), v0); + _mm_stream_si128((__m128i*)(ptr + 2), v1); + } + else if( mode == hal::STORE_ALIGNED ) + { + _mm_store_si128((__m128i*)(ptr), v0); + _mm_store_si128((__m128i*)(ptr + 2), v1); + } + else + { + _mm_storeu_si128((__m128i*)(ptr), v0); + _mm_storeu_si128((__m128i*)(ptr + 2), v1); + } +} + +inline void v_store_interleave(uint64 *ptr, const v_uint64x2& a, const v_uint64x2& b, + const v_uint64x2& c, hal::StoreMode mode = hal::STORE_UNALIGNED) { - v_uint32x4 t0, t1, t2, t3; - v_transpose4x4(a, b, c, d, t0, t1, t2, t3); - v_store(ptr, t0); - v_store(ptr + 4, t1); - v_store(ptr + 8, t2); - v_store(ptr + 12, t3); + __m128i v0 = _mm_unpacklo_epi64(a.val, b.val); + __m128i v1 = _mm_unpacklo_epi64(c.val, _mm_unpackhi_epi64(a.val, a.val)); + __m128i v2 = _mm_unpackhi_epi64(b.val, c.val); + + if( mode == hal::STORE_ALIGNED_NOCACHE ) + { + _mm_stream_si128((__m128i*)(ptr), v0); + _mm_stream_si128((__m128i*)(ptr + 2), v1); + _mm_stream_si128((__m128i*)(ptr + 4), v2); + } + else if( mode == hal::STORE_ALIGNED ) + { + _mm_store_si128((__m128i*)(ptr), v0); + _mm_store_si128((__m128i*)(ptr + 2), v1); + _mm_store_si128((__m128i*)(ptr + 4), v2); + } + else + { + _mm_storeu_si128((__m128i*)(ptr), v0); + _mm_storeu_si128((__m128i*)(ptr + 2), v1); + _mm_storeu_si128((__m128i*)(ptr + 4), v2); + } } -#define OPENCV_HAL_IMPL_SSE_LOADSTORE_INTERLEAVE(_Tpvec, _Tp, suffix, _Tpuvec, _Tpu, usuffix) \ -inline void v_load_deinterleave( const _Tp* ptr, _Tpvec& a0, \ - _Tpvec& b0, _Tpvec& c0 ) \ +inline void v_store_interleave(uint64 *ptr, const v_uint64x2& a, const v_uint64x2& b, + const v_uint64x2& c, const v_uint64x2& d, + hal::StoreMode mode = hal::STORE_UNALIGNED) +{ + __m128i v0 = _mm_unpacklo_epi64(a.val, b.val); + __m128i v1 = _mm_unpacklo_epi64(c.val, d.val); + __m128i v2 = _mm_unpackhi_epi64(a.val, b.val); + __m128i v3 = _mm_unpackhi_epi64(c.val, d.val); + + if( mode == hal::STORE_ALIGNED_NOCACHE ) + { + _mm_stream_si128((__m128i*)(ptr), v0); + _mm_stream_si128((__m128i*)(ptr + 2), v1); + _mm_stream_si128((__m128i*)(ptr + 4), v2); + _mm_stream_si128((__m128i*)(ptr + 6), v3); + } + else if( mode == hal::STORE_ALIGNED ) + { + _mm_store_si128((__m128i*)(ptr), v0); + _mm_store_si128((__m128i*)(ptr + 2), v1); + _mm_store_si128((__m128i*)(ptr + 4), v2); + _mm_store_si128((__m128i*)(ptr + 6), v3); + } + else + { + _mm_storeu_si128((__m128i*)(ptr), v0); + _mm_storeu_si128((__m128i*)(ptr + 2), v1); + _mm_storeu_si128((__m128i*)(ptr + 4), v2); + _mm_storeu_si128((__m128i*)(ptr + 6), v3); + } +} + +#define OPENCV_HAL_IMPL_SSE_LOADSTORE_INTERLEAVE(_Tpvec0, _Tp0, suffix0, _Tpvec1, _Tp1, suffix1) \ +inline void v_load_deinterleave( const _Tp0* ptr, _Tpvec0& a0, _Tpvec0& b0 ) \ +{ \ + _Tpvec1 a1, b1; \ + v_load_deinterleave((const _Tp1*)ptr, a1, b1); \ + a0 = v_reinterpret_as_##suffix0(a1); \ + b0 = v_reinterpret_as_##suffix0(b1); \ +} \ +inline void v_load_deinterleave( const _Tp0* ptr, _Tpvec0& a0, _Tpvec0& b0, _Tpvec0& c0 ) \ +{ \ + _Tpvec1 a1, b1, c1; \ + v_load_deinterleave((const _Tp1*)ptr, a1, b1, c1); \ + a0 = v_reinterpret_as_##suffix0(a1); \ + b0 = v_reinterpret_as_##suffix0(b1); \ + c0 = v_reinterpret_as_##suffix0(c1); \ +} \ +inline void v_load_deinterleave( const _Tp0* ptr, _Tpvec0& a0, _Tpvec0& b0, _Tpvec0& c0, _Tpvec0& d0 ) \ { \ - _Tpuvec a1, b1, c1; \ - v_load_deinterleave((const _Tpu*)ptr, a1, b1, c1); \ - a0 = v_reinterpret_as_##suffix(a1); \ - b0 = v_reinterpret_as_##suffix(b1); \ - c0 = v_reinterpret_as_##suffix(c1); \ + _Tpvec1 a1, b1, c1, d1; \ + v_load_deinterleave((const _Tp1*)ptr, a1, b1, c1, d1); \ + a0 = v_reinterpret_as_##suffix0(a1); \ + b0 = v_reinterpret_as_##suffix0(b1); \ + c0 = v_reinterpret_as_##suffix0(c1); \ + d0 = v_reinterpret_as_##suffix0(d1); \ } \ -inline void v_load_deinterleave( const _Tp* ptr, _Tpvec& a0, \ - _Tpvec& b0, _Tpvec& c0, _Tpvec& d0 ) \ +inline void v_store_interleave( _Tp0* ptr, const _Tpvec0& a0, const _Tpvec0& b0, \ + hal::StoreMode mode = hal::STORE_UNALIGNED ) \ { \ - _Tpuvec a1, b1, c1, d1; \ - v_load_deinterleave((const _Tpu*)ptr, a1, b1, c1, d1); \ - a0 = v_reinterpret_as_##suffix(a1); \ - b0 = v_reinterpret_as_##suffix(b1); \ - c0 = v_reinterpret_as_##suffix(c1); \ - d0 = v_reinterpret_as_##suffix(d1); \ + _Tpvec1 a1 = v_reinterpret_as_##suffix1(a0); \ + _Tpvec1 b1 = v_reinterpret_as_##suffix1(b0); \ + v_store_interleave((_Tp1*)ptr, a1, b1, mode); \ } \ -inline void v_store_interleave( _Tp* ptr, const _Tpvec& a0, \ - const _Tpvec& b0, const _Tpvec& c0 ) \ +inline void v_store_interleave( _Tp0* ptr, const _Tpvec0& a0, const _Tpvec0& b0, \ + const _Tpvec0& c0, hal::StoreMode mode = hal::STORE_UNALIGNED ) \ { \ - _Tpuvec a1 = v_reinterpret_as_##usuffix(a0); \ - _Tpuvec b1 = v_reinterpret_as_##usuffix(b0); \ - _Tpuvec c1 = v_reinterpret_as_##usuffix(c0); \ - v_store_interleave((_Tpu*)ptr, a1, b1, c1); \ + _Tpvec1 a1 = v_reinterpret_as_##suffix1(a0); \ + _Tpvec1 b1 = v_reinterpret_as_##suffix1(b0); \ + _Tpvec1 c1 = v_reinterpret_as_##suffix1(c0); \ + v_store_interleave((_Tp1*)ptr, a1, b1, c1, mode); \ } \ -inline void v_store_interleave( _Tp* ptr, const _Tpvec& a0, const _Tpvec& b0, \ - const _Tpvec& c0, const _Tpvec& d0 ) \ +inline void v_store_interleave( _Tp0* ptr, const _Tpvec0& a0, const _Tpvec0& b0, \ + const _Tpvec0& c0, const _Tpvec0& d0, \ + hal::StoreMode mode = hal::STORE_UNALIGNED ) \ { \ - _Tpuvec a1 = v_reinterpret_as_##usuffix(a0); \ - _Tpuvec b1 = v_reinterpret_as_##usuffix(b0); \ - _Tpuvec c1 = v_reinterpret_as_##usuffix(c0); \ - _Tpuvec d1 = v_reinterpret_as_##usuffix(d0); \ - v_store_interleave((_Tpu*)ptr, a1, b1, c1, d1); \ + _Tpvec1 a1 = v_reinterpret_as_##suffix1(a0); \ + _Tpvec1 b1 = v_reinterpret_as_##suffix1(b0); \ + _Tpvec1 c1 = v_reinterpret_as_##suffix1(c0); \ + _Tpvec1 d1 = v_reinterpret_as_##suffix1(d0); \ + v_store_interleave((_Tp1*)ptr, a1, b1, c1, d1, mode); \ } OPENCV_HAL_IMPL_SSE_LOADSTORE_INTERLEAVE(v_int8x16, schar, s8, v_uint8x16, uchar, u8) OPENCV_HAL_IMPL_SSE_LOADSTORE_INTERLEAVE(v_int16x8, short, s16, v_uint16x8, ushort, u16) OPENCV_HAL_IMPL_SSE_LOADSTORE_INTERLEAVE(v_int32x4, int, s32, v_uint32x4, unsigned, u32) -OPENCV_HAL_IMPL_SSE_LOADSTORE_INTERLEAVE(v_float32x4, float, f32, v_uint32x4, unsigned, u32) +OPENCV_HAL_IMPL_SSE_LOADSTORE_INTERLEAVE(v_int64x2, int64, s64, v_uint64x2, uint64, u64) +OPENCV_HAL_IMPL_SSE_LOADSTORE_INTERLEAVE(v_float64x2, double, f64, v_uint64x2, uint64, u64) inline v_float32x4 v_cvt_f32(const v_int32x4& a) { @@ -1582,16 +2972,462 @@ inline v_float32x4 v_cvt_f32(const v_float64x2& a) return v_float32x4(_mm_cvtpd_ps(a.val)); } +inline v_float32x4 v_cvt_f32(const v_float64x2& a, const v_float64x2& b) +{ + return v_float32x4(_mm_movelh_ps(_mm_cvtpd_ps(a.val), _mm_cvtpd_ps(b.val))); +} + inline v_float64x2 v_cvt_f64(const v_int32x4& a) { return v_float64x2(_mm_cvtepi32_pd(a.val)); } +inline v_float64x2 v_cvt_f64_high(const v_int32x4& a) +{ + return v_float64x2(_mm_cvtepi32_pd(_mm_srli_si128(a.val,8))); +} + inline v_float64x2 v_cvt_f64(const v_float32x4& a) { return v_float64x2(_mm_cvtps_pd(a.val)); } +inline v_float64x2 v_cvt_f64_high(const v_float32x4& a) +{ + return v_float64x2(_mm_cvtps_pd(_mm_movehl_ps(a.val, a.val))); +} + +// from (Mysticial and wim) https://stackoverflow.com/q/41144668 +inline v_float64x2 v_cvt_f64(const v_int64x2& v) +{ + // constants encoded as floating-point + __m128i magic_i_hi32 = _mm_set1_epi64x(0x4530000080000000); // 2^84 + 2^63 + __m128i magic_i_all = _mm_set1_epi64x(0x4530000080100000); // 2^84 + 2^63 + 2^52 + __m128d magic_d_all = _mm_castsi128_pd(magic_i_all); + // Blend the 32 lowest significant bits of v with magic_int_lo +#if CV_SSE4_1 + __m128i magic_i_lo = _mm_set1_epi64x(0x4330000000000000); // 2^52 + __m128i v_lo = _mm_blend_epi16(v.val, magic_i_lo, 0xcc); +#else + __m128i magic_i_lo = _mm_set1_epi32(0x43300000); // 2^52 + __m128i v_lo = _mm_unpacklo_epi32(_mm_shuffle_epi32(v.val, _MM_SHUFFLE(0, 0, 2, 0)), magic_i_lo); +#endif + // Extract the 32 most significant bits of v + __m128i v_hi = _mm_srli_epi64(v.val, 32); + // Flip the msb of v_hi and blend with 0x45300000 + v_hi = _mm_xor_si128(v_hi, magic_i_hi32); + // Compute in double precision + __m128d v_hi_dbl = _mm_sub_pd(_mm_castsi128_pd(v_hi), magic_d_all); + // (v_hi - magic_d_all) + v_lo Do not assume associativity of floating point addition + __m128d result = _mm_add_pd(v_hi_dbl, _mm_castsi128_pd(v_lo)); + return v_float64x2(result); +} + +////////////// Lookup table access //////////////////// + +inline v_int8x16 v_lut(const schar* tab, const int* idx) +{ +#if defined(_MSC_VER) + return v_int8x16(_mm_setr_epi8(tab[idx[0]], tab[idx[1]], tab[idx[ 2]], tab[idx[ 3]], tab[idx[ 4]], tab[idx[ 5]], tab[idx[ 6]], tab[idx[ 7]], + tab[idx[8]], tab[idx[9]], tab[idx[10]], tab[idx[11]], tab[idx[12]], tab[idx[13]], tab[idx[14]], tab[idx[15]])); +#else + return v_int8x16(_mm_setr_epi64( + _mm_setr_pi8(tab[idx[0]], tab[idx[1]], tab[idx[ 2]], tab[idx[ 3]], tab[idx[ 4]], tab[idx[ 5]], tab[idx[ 6]], tab[idx[ 7]]), + _mm_setr_pi8(tab[idx[8]], tab[idx[9]], tab[idx[10]], tab[idx[11]], tab[idx[12]], tab[idx[13]], tab[idx[14]], tab[idx[15]]) + )); +#endif +} +inline v_int8x16 v_lut_pairs(const schar* tab, const int* idx) +{ +#if defined(_MSC_VER) + return v_int8x16(_mm_setr_epi16(*(const short*)(tab + idx[0]), *(const short*)(tab + idx[1]), *(const short*)(tab + idx[2]), *(const short*)(tab + idx[3]), + *(const short*)(tab + idx[4]), *(const short*)(tab + idx[5]), *(const short*)(tab + idx[6]), *(const short*)(tab + idx[7]))); +#else + return v_int8x16(_mm_setr_epi64( + _mm_setr_pi16(*(const short*)(tab + idx[0]), *(const short*)(tab + idx[1]), *(const short*)(tab + idx[2]), *(const short*)(tab + idx[3])), + _mm_setr_pi16(*(const short*)(tab + idx[4]), *(const short*)(tab + idx[5]), *(const short*)(tab + idx[6]), *(const short*)(tab + idx[7])) + )); +#endif +} +inline v_int8x16 v_lut_quads(const schar* tab, const int* idx) +{ +#if defined(_MSC_VER) + return v_int8x16(_mm_setr_epi32(*(const int*)(tab + idx[0]), *(const int*)(tab + idx[1]), + *(const int*)(tab + idx[2]), *(const int*)(tab + idx[3]))); +#else + return v_int8x16(_mm_setr_epi64( + _mm_setr_pi32(*(const int*)(tab + idx[0]), *(const int*)(tab + idx[1])), + _mm_setr_pi32(*(const int*)(tab + idx[2]), *(const int*)(tab + idx[3])) + )); +#endif +} +inline v_uint8x16 v_lut(const uchar* tab, const int* idx) { return v_reinterpret_as_u8(v_lut((const schar *)tab, idx)); } +inline v_uint8x16 v_lut_pairs(const uchar* tab, const int* idx) { return v_reinterpret_as_u8(v_lut_pairs((const schar *)tab, idx)); } +inline v_uint8x16 v_lut_quads(const uchar* tab, const int* idx) { return v_reinterpret_as_u8(v_lut_quads((const schar *)tab, idx)); } + +inline v_int16x8 v_lut(const short* tab, const int* idx) +{ +#if defined(_MSC_VER) + return v_int16x8(_mm_setr_epi16(tab[idx[0]], tab[idx[1]], tab[idx[2]], tab[idx[3]], + tab[idx[4]], tab[idx[5]], tab[idx[6]], tab[idx[7]])); +#else + return v_int16x8(_mm_setr_epi64( + _mm_setr_pi16(tab[idx[0]], tab[idx[1]], tab[idx[2]], tab[idx[3]]), + _mm_setr_pi16(tab[idx[4]], tab[idx[5]], tab[idx[6]], tab[idx[7]]) + )); +#endif +} +inline v_int16x8 v_lut_pairs(const short* tab, const int* idx) +{ +#if defined(_MSC_VER) + return v_int16x8(_mm_setr_epi32(*(const int*)(tab + idx[0]), *(const int*)(tab + idx[1]), + *(const int*)(tab + idx[2]), *(const int*)(tab + idx[3]))); +#else + return v_int16x8(_mm_setr_epi64( + _mm_setr_pi32(*(const int*)(tab + idx[0]), *(const int*)(tab + idx[1])), + _mm_setr_pi32(*(const int*)(tab + idx[2]), *(const int*)(tab + idx[3])) + )); +#endif +} +inline v_int16x8 v_lut_quads(const short* tab, const int* idx) +{ + return v_int16x8(_mm_set_epi64x(*(const int64_t*)(tab + idx[1]), *(const int64_t*)(tab + idx[0]))); +} +inline v_uint16x8 v_lut(const ushort* tab, const int* idx) { return v_reinterpret_as_u16(v_lut((const short *)tab, idx)); } +inline v_uint16x8 v_lut_pairs(const ushort* tab, const int* idx) { return v_reinterpret_as_u16(v_lut_pairs((const short *)tab, idx)); } +inline v_uint16x8 v_lut_quads(const ushort* tab, const int* idx) { return v_reinterpret_as_u16(v_lut_quads((const short *)tab, idx)); } + +inline v_int32x4 v_lut(const int* tab, const int* idx) +{ +#if defined(_MSC_VER) + return v_int32x4(_mm_setr_epi32(tab[idx[0]], tab[idx[1]], + tab[idx[2]], tab[idx[3]])); +#else + return v_int32x4(_mm_setr_epi64( + _mm_setr_pi32(tab[idx[0]], tab[idx[1]]), + _mm_setr_pi32(tab[idx[2]], tab[idx[3]]) + )); +#endif +} +inline v_int32x4 v_lut_pairs(const int* tab, const int* idx) +{ + return v_int32x4(_mm_set_epi64x(*(const int64_t*)(tab + idx[1]), *(const int64_t*)(tab + idx[0]))); +} +inline v_int32x4 v_lut_quads(const int* tab, const int* idx) +{ + return v_int32x4(_mm_loadu_si128((const __m128i*)(tab + idx[0]))); +} +inline v_uint32x4 v_lut(const unsigned* tab, const int* idx) { return v_reinterpret_as_u32(v_lut((const int *)tab, idx)); } +inline v_uint32x4 v_lut_pairs(const unsigned* tab, const int* idx) { return v_reinterpret_as_u32(v_lut_pairs((const int *)tab, idx)); } +inline v_uint32x4 v_lut_quads(const unsigned* tab, const int* idx) { return v_reinterpret_as_u32(v_lut_quads((const int *)tab, idx)); } + +inline v_int64x2 v_lut(const int64_t* tab, const int* idx) +{ + return v_int64x2(_mm_set_epi64x(tab[idx[1]], tab[idx[0]])); +} +inline v_int64x2 v_lut_pairs(const int64_t* tab, const int* idx) +{ + return v_int64x2(_mm_loadu_si128((const __m128i*)(tab + idx[0]))); +} +inline v_uint64x2 v_lut(const uint64_t* tab, const int* idx) { return v_reinterpret_as_u64(v_lut((const int64_t *)tab, idx)); } +inline v_uint64x2 v_lut_pairs(const uint64_t* tab, const int* idx) { return v_reinterpret_as_u64(v_lut_pairs((const int64_t *)tab, idx)); } + +inline v_float32x4 v_lut(const float* tab, const int* idx) +{ + return v_float32x4(_mm_setr_ps(tab[idx[0]], tab[idx[1]], tab[idx[2]], tab[idx[3]])); +} +inline v_float32x4 v_lut_pairs(const float* tab, const int* idx) { return v_reinterpret_as_f32(v_lut_pairs((const int *)tab, idx)); } +inline v_float32x4 v_lut_quads(const float* tab, const int* idx) { return v_reinterpret_as_f32(v_lut_quads((const int *)tab, idx)); } + +inline v_float64x2 v_lut(const double* tab, const int* idx) +{ + return v_float64x2(_mm_setr_pd(tab[idx[0]], tab[idx[1]])); +} +inline v_float64x2 v_lut_pairs(const double* tab, const int* idx) { return v_float64x2(_mm_castsi128_pd(_mm_loadu_si128((const __m128i*)(tab + idx[0])))); } + +inline v_int32x4 v_lut(const int* tab, const v_int32x4& idxvec) +{ + int CV_DECL_ALIGNED(32) idx[4]; + v_store_aligned(idx, idxvec); + return v_int32x4(_mm_setr_epi32(tab[idx[0]], tab[idx[1]], tab[idx[2]], tab[idx[3]])); +} + +inline v_uint32x4 v_lut(const unsigned* tab, const v_int32x4& idxvec) +{ + return v_reinterpret_as_u32(v_lut((const int *)tab, idxvec)); +} + +inline v_float32x4 v_lut(const float* tab, const v_int32x4& idxvec) +{ + int CV_DECL_ALIGNED(32) idx[4]; + v_store_aligned(idx, idxvec); + return v_float32x4(_mm_setr_ps(tab[idx[0]], tab[idx[1]], tab[idx[2]], tab[idx[3]])); +} + +inline v_float64x2 v_lut(const double* tab, const v_int32x4& idxvec) +{ + int idx[2]; + v_store_low(idx, idxvec); + return v_float64x2(_mm_setr_pd(tab[idx[0]], tab[idx[1]])); +} + +// loads pairs from the table and deinterleaves them, e.g. returns: +// x = (tab[idxvec[0], tab[idxvec[1]], tab[idxvec[2]], tab[idxvec[3]]), +// y = (tab[idxvec[0]+1], tab[idxvec[1]+1], tab[idxvec[2]+1], tab[idxvec[3]+1]) +// note that the indices are float's indices, not the float-pair indices. +// in theory, this function can be used to implement bilinear interpolation, +// when idxvec are the offsets within the image. +inline void v_lut_deinterleave(const float* tab, const v_int32x4& idxvec, v_float32x4& x, v_float32x4& y) +{ + int CV_DECL_ALIGNED(32) idx[4]; + v_store_aligned(idx, idxvec); + __m128 z = _mm_setzero_ps(); + __m128 xy01 = _mm_loadl_pi(z, (__m64*)(tab + idx[0])); + __m128 xy23 = _mm_loadl_pi(z, (__m64*)(tab + idx[2])); + xy01 = _mm_loadh_pi(xy01, (__m64*)(tab + idx[1])); + xy23 = _mm_loadh_pi(xy23, (__m64*)(tab + idx[3])); + __m128 xxyy02 = _mm_unpacklo_ps(xy01, xy23); + __m128 xxyy13 = _mm_unpackhi_ps(xy01, xy23); + x = v_float32x4(_mm_unpacklo_ps(xxyy02, xxyy13)); + y = v_float32x4(_mm_unpackhi_ps(xxyy02, xxyy13)); +} + +inline void v_lut_deinterleave(const double* tab, const v_int32x4& idxvec, v_float64x2& x, v_float64x2& y) +{ + int idx[2]; + v_store_low(idx, idxvec); + __m128d xy0 = _mm_loadu_pd(tab + idx[0]); + __m128d xy1 = _mm_loadu_pd(tab + idx[1]); + x = v_float64x2(_mm_unpacklo_pd(xy0, xy1)); + y = v_float64x2(_mm_unpackhi_pd(xy0, xy1)); +} + +inline v_int8x16 v_interleave_pairs(const v_int8x16& vec) +{ +#if CV_SSSE3 + return v_int8x16(_mm_shuffle_epi8(vec.val, _mm_set_epi64x(0x0f0d0e0c0b090a08, 0x0705060403010200))); +#else + __m128i a = _mm_shufflelo_epi16(vec.val, _MM_SHUFFLE(3, 1, 2, 0)); + a = _mm_shufflehi_epi16(a, _MM_SHUFFLE(3, 1, 2, 0)); + a = _mm_shuffle_epi32(a, _MM_SHUFFLE(3, 1, 2, 0)); + return v_int8x16(_mm_unpacklo_epi8(a, _mm_unpackhi_epi64(a, a))); +#endif +} +inline v_uint8x16 v_interleave_pairs(const v_uint8x16& vec) { return v_reinterpret_as_u8(v_interleave_pairs(v_reinterpret_as_s8(vec))); } +inline v_int8x16 v_interleave_quads(const v_int8x16& vec) +{ +#if CV_SSSE3 + return v_int8x16(_mm_shuffle_epi8(vec.val, _mm_set_epi64x(0x0f0b0e0a0d090c08, 0x0703060205010400))); +#else + __m128i a = _mm_shuffle_epi32(vec.val, _MM_SHUFFLE(3, 1, 2, 0)); + return v_int8x16(_mm_unpacklo_epi8(a, _mm_unpackhi_epi64(a, a))); +#endif +} +inline v_uint8x16 v_interleave_quads(const v_uint8x16& vec) { return v_reinterpret_as_u8(v_interleave_quads(v_reinterpret_as_s8(vec))); } + +inline v_int16x8 v_interleave_pairs(const v_int16x8& vec) +{ +#if CV_SSSE3 + return v_int16x8(_mm_shuffle_epi8(vec.val, _mm_set_epi64x(0x0f0e0b0a0d0c0908, 0x0706030205040100))); +#else + __m128i a = _mm_shufflelo_epi16(vec.val, _MM_SHUFFLE(3, 1, 2, 0)); + return v_int16x8(_mm_shufflehi_epi16(a, _MM_SHUFFLE(3, 1, 2, 0))); +#endif +} +inline v_uint16x8 v_interleave_pairs(const v_uint16x8& vec) { return v_reinterpret_as_u16(v_interleave_pairs(v_reinterpret_as_s16(vec))); } +inline v_int16x8 v_interleave_quads(const v_int16x8& vec) +{ +#if CV_SSSE3 + return v_int16x8(_mm_shuffle_epi8(vec.val, _mm_set_epi64x(0x0f0e07060d0c0504, 0x0b0a030209080100))); +#else + return v_int16x8(_mm_unpacklo_epi16(vec.val, _mm_unpackhi_epi64(vec.val, vec.val))); +#endif +} +inline v_uint16x8 v_interleave_quads(const v_uint16x8& vec) { return v_reinterpret_as_u16(v_interleave_quads(v_reinterpret_as_s16(vec))); } + +inline v_int32x4 v_interleave_pairs(const v_int32x4& vec) +{ + return v_int32x4(_mm_shuffle_epi32(vec.val, _MM_SHUFFLE(3, 1, 2, 0))); +} +inline v_uint32x4 v_interleave_pairs(const v_uint32x4& vec) { return v_reinterpret_as_u32(v_interleave_pairs(v_reinterpret_as_s32(vec))); } +inline v_float32x4 v_interleave_pairs(const v_float32x4& vec) { return v_reinterpret_as_f32(v_interleave_pairs(v_reinterpret_as_s32(vec))); } + +inline v_int8x16 v_pack_triplets(const v_int8x16& vec) +{ +#if CV_SSSE3 + return v_int8x16(_mm_shuffle_epi8(vec.val, _mm_set_epi64x(0xffffff0f0e0d0c0a, 0x0908060504020100))); +#else + __m128i mask = _mm_set1_epi64x(0x00000000FFFFFFFF); + __m128i a = _mm_srli_si128(_mm_or_si128(_mm_andnot_si128(mask, vec.val), _mm_and_si128(mask, _mm_sll_epi32(vec.val, _mm_set_epi64x(0, 8)))), 1); + return v_int8x16(_mm_srli_si128(_mm_shufflelo_epi16(a, _MM_SHUFFLE(2, 1, 0, 3)), 2)); +#endif +} +inline v_uint8x16 v_pack_triplets(const v_uint8x16& vec) { return v_reinterpret_as_u8(v_pack_triplets(v_reinterpret_as_s8(vec))); } + +inline v_int16x8 v_pack_triplets(const v_int16x8& vec) +{ +#if CV_SSSE3 + return v_int16x8(_mm_shuffle_epi8(vec.val, _mm_set_epi64x(0xffff0f0e0d0c0b0a, 0x0908050403020100))); +#else + return v_int16x8(_mm_srli_si128(_mm_shufflelo_epi16(vec.val, _MM_SHUFFLE(2, 1, 0, 3)), 2)); +#endif +} +inline v_uint16x8 v_pack_triplets(const v_uint16x8& vec) { return v_reinterpret_as_u16(v_pack_triplets(v_reinterpret_as_s16(vec))); } + +inline v_int32x4 v_pack_triplets(const v_int32x4& vec) { return vec; } +inline v_uint32x4 v_pack_triplets(const v_uint32x4& vec) { return vec; } +inline v_float32x4 v_pack_triplets(const v_float32x4& vec) { return vec; } + +template +inline uchar v_extract_n(const v_uint8x16& v) +{ +#if CV_SSE4_1 + return (uchar)_mm_extract_epi8(v.val, i); +#else + return v_rotate_right(v).get0(); +#endif +} + +template +inline schar v_extract_n(const v_int8x16& v) +{ + return (schar)v_extract_n(v_reinterpret_as_u8(v)); +} + +template +inline ushort v_extract_n(const v_uint16x8& v) +{ + return (ushort)_mm_extract_epi16(v.val, i); +} + +template +inline short v_extract_n(const v_int16x8& v) +{ + return (short)v_extract_n(v_reinterpret_as_u16(v)); +} + +template +inline uint v_extract_n(const v_uint32x4& v) +{ +#if CV_SSE4_1 + return (uint)_mm_extract_epi32(v.val, i); +#else + return v_rotate_right(v).get0(); +#endif +} + +template +inline int v_extract_n(const v_int32x4& v) +{ + return (int)v_extract_n(v_reinterpret_as_u32(v)); +} + +template +inline uint64 v_extract_n(const v_uint64x2& v) +{ +#ifdef CV__SIMD_NATIVE_mm_extract_epi64 + return (uint64)_v128_extract_epi64(v.val); +#else + return v_rotate_right(v).get0(); +#endif +} + +template +inline int64 v_extract_n(const v_int64x2& v) +{ + return (int64)v_extract_n(v_reinterpret_as_u64(v)); +} + +template +inline float v_extract_n(const v_float32x4& v) +{ + union { uint iv; float fv; } d; + d.iv = v_extract_n(v_reinterpret_as_u32(v)); + return d.fv; +} + +template +inline double v_extract_n(const v_float64x2& v) +{ + union { uint64 iv; double dv; } d; + d.iv = v_extract_n(v_reinterpret_as_u64(v)); + return d.dv; +} + +template +inline v_int32x4 v_broadcast_element(const v_int32x4& v) +{ + return v_int32x4(_mm_shuffle_epi32(v.val, _MM_SHUFFLE(i,i,i,i))); +} + +template +inline v_uint32x4 v_broadcast_element(const v_uint32x4& v) +{ + return v_uint32x4(_mm_shuffle_epi32(v.val, _MM_SHUFFLE(i,i,i,i))); +} + +template +inline v_float32x4 v_broadcast_element(const v_float32x4& v) +{ + return v_float32x4(_mm_shuffle_ps(v.val, v.val, _MM_SHUFFLE((char)i,(char)i,(char)i,(char)i))); +} + +////////////// FP16 support /////////////////////////// + +inline v_float32x4 v_load_expand(const float16_t* ptr) +{ +#if CV_FP16 + return v_float32x4(_mm_cvtph_ps(_mm_loadu_si128((const __m128i*)ptr))); +#else + const __m128i z = _mm_setzero_si128(), delta = _mm_set1_epi32(0x38000000); + const __m128i signmask = _mm_set1_epi32(0x80000000), maxexp = _mm_set1_epi32(0x7c000000); + const __m128 deltaf = _mm_castsi128_ps(_mm_set1_epi32(0x38800000)); + __m128i bits = _mm_unpacklo_epi16(z, _mm_loadl_epi64((const __m128i*)ptr)); // h << 16 + __m128i e = _mm_and_si128(bits, maxexp), sign = _mm_and_si128(bits, signmask); + __m128i t = _mm_add_epi32(_mm_srli_epi32(_mm_xor_si128(bits, sign), 3), delta); // ((h & 0x7fff) << 13) + delta + __m128i zt = _mm_castps_si128(_mm_sub_ps(_mm_castsi128_ps(_mm_add_epi32(t, _mm_set1_epi32(1 << 23))), deltaf)); + + t = _mm_add_epi32(t, _mm_and_si128(delta, _mm_cmpeq_epi32(maxexp, e))); + __m128i zmask = _mm_cmpeq_epi32(e, z); + __m128i ft = v_select_si128(zmask, zt, t); + return v_float32x4(_mm_castsi128_ps(_mm_or_si128(ft, sign))); +#endif +} + +inline void v_pack_store(float16_t* ptr, const v_float32x4& v) +{ +#if CV_FP16 + __m128i fp16_value = _mm_cvtps_ph(v.val, 0); + _mm_storel_epi64((__m128i*)ptr, fp16_value); +#else + const __m128i signmask = _mm_set1_epi32(0x80000000); + const __m128i rval = _mm_set1_epi32(0x3f000000); + + __m128i t = _mm_castps_si128(v.val); + __m128i sign = _mm_srai_epi32(_mm_and_si128(t, signmask), 16); + t = _mm_andnot_si128(signmask, t); + + __m128i finitemask = _mm_cmpgt_epi32(_mm_set1_epi32(0x47800000), t); + __m128i isnan = _mm_cmpgt_epi32(t, _mm_set1_epi32(0x7f800000)); + __m128i naninf = v_select_si128(isnan, _mm_set1_epi32(0x7e00), _mm_set1_epi32(0x7c00)); + __m128i tinymask = _mm_cmpgt_epi32(_mm_set1_epi32(0x38800000), t); + __m128i tt = _mm_castps_si128(_mm_add_ps(_mm_castsi128_ps(t), _mm_castsi128_ps(rval))); + tt = _mm_sub_epi32(tt, rval); + __m128i odd = _mm_and_si128(_mm_srli_epi32(t, 13), _mm_set1_epi32(1)); + __m128i nt = _mm_add_epi32(t, _mm_set1_epi32(0xc8000fff)); + nt = _mm_srli_epi32(_mm_add_epi32(nt, odd), 13); + t = v_select_si128(tinymask, tt, nt); + t = v_select_si128(finitemask, t, naninf); + t = _mm_or_si128(t, sign); + t = _mm_packs_epi32(t, t); + _mm_storel_epi64((__m128i*)ptr, t); +#endif +} + +inline void v_cleanup() {} + +CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END + //! @endcond } diff --git a/IPL/include/opencv/opencv2/core/hal/intrin_sse_em.hpp b/IPL/include/opencv/opencv2/core/hal/intrin_sse_em.hpp new file mode 100644 index 0000000..6fb0881 --- /dev/null +++ b/IPL/include/opencv/opencv2/core/hal/intrin_sse_em.hpp @@ -0,0 +1,180 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html + +#ifndef OPENCV_HAL_INTRIN_SSE_EM_HPP +#define OPENCV_HAL_INTRIN_SSE_EM_HPP + +namespace cv +{ + +//! @cond IGNORED + +CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN + +#define OPENCV_HAL_SSE_WRAP_1(fun, tp) \ + inline tp _v128_##fun(const tp& a) \ + { return _mm_##fun(a); } + +#define OPENCV_HAL_SSE_WRAP_2(fun, tp) \ + inline tp _v128_##fun(const tp& a, const tp& b) \ + { return _mm_##fun(a, b); } + +#define OPENCV_HAL_SSE_WRAP_3(fun, tp) \ + inline tp _v128_##fun(const tp& a, const tp& b, const tp& c) \ + { return _mm_##fun(a, b, c); } + +///////////////////////////// XOP ///////////////////////////// + +// [todo] define CV_XOP +#if 1 // CV_XOP +inline __m128i _v128_comgt_epu32(const __m128i& a, const __m128i& b) +{ + const __m128i delta = _mm_set1_epi32((int)0x80000000); + return _mm_cmpgt_epi32(_mm_xor_si128(a, delta), _mm_xor_si128(b, delta)); +} +// wrapping XOP +#else +OPENCV_HAL_SSE_WRAP_2(_v128_comgt_epu32, __m128i) +#endif // !CV_XOP + +///////////////////////////// SSE4.1 ///////////////////////////// + +#if !CV_SSE4_1 + +/** Swizzle **/ +inline __m128i _v128_blendv_epi8(const __m128i& a, const __m128i& b, const __m128i& mask) +{ return _mm_xor_si128(a, _mm_and_si128(_mm_xor_si128(b, a), mask)); } + +/** Convert **/ +// 8 >> 16 +inline __m128i _v128_cvtepu8_epi16(const __m128i& a) +{ + const __m128i z = _mm_setzero_si128(); + return _mm_unpacklo_epi8(a, z); +} +inline __m128i _v128_cvtepi8_epi16(const __m128i& a) +{ return _mm_srai_epi16(_mm_unpacklo_epi8(a, a), 8); } +// 8 >> 32 +inline __m128i _v128_cvtepu8_epi32(const __m128i& a) +{ + const __m128i z = _mm_setzero_si128(); + return _mm_unpacklo_epi16(_mm_unpacklo_epi8(a, z), z); +} +inline __m128i _v128_cvtepi8_epi32(const __m128i& a) +{ + __m128i r = _mm_unpacklo_epi8(a, a); + r = _mm_unpacklo_epi8(r, r); + return _mm_srai_epi32(r, 24); +} +// 16 >> 32 +inline __m128i _v128_cvtepu16_epi32(const __m128i& a) +{ + const __m128i z = _mm_setzero_si128(); + return _mm_unpacklo_epi16(a, z); +} +inline __m128i _v128_cvtepi16_epi32(const __m128i& a) +{ return _mm_srai_epi32(_mm_unpacklo_epi16(a, a), 16); } +// 32 >> 64 +inline __m128i _v128_cvtepu32_epi64(const __m128i& a) +{ + const __m128i z = _mm_setzero_si128(); + return _mm_unpacklo_epi32(a, z); +} +inline __m128i _v128_cvtepi32_epi64(const __m128i& a) +{ return _mm_unpacklo_epi32(a, _mm_srai_epi32(a, 31)); } + +/** Arithmetic **/ +inline __m128i _v128_mullo_epi32(const __m128i& a, const __m128i& b) +{ + __m128i c0 = _mm_mul_epu32(a, b); + __m128i c1 = _mm_mul_epu32(_mm_srli_epi64(a, 32), _mm_srli_epi64(b, 32)); + __m128i d0 = _mm_unpacklo_epi32(c0, c1); + __m128i d1 = _mm_unpackhi_epi32(c0, c1); + return _mm_unpacklo_epi64(d0, d1); +} + +/** Math **/ +inline __m128i _v128_min_epu32(const __m128i& a, const __m128i& b) +{ return _v128_blendv_epi8(a, b, _v128_comgt_epu32(a, b)); } + +// wrapping SSE4.1 +#else +OPENCV_HAL_SSE_WRAP_1(cvtepu8_epi16, __m128i) +OPENCV_HAL_SSE_WRAP_1(cvtepi8_epi16, __m128i) +OPENCV_HAL_SSE_WRAP_1(cvtepu8_epi32, __m128i) +OPENCV_HAL_SSE_WRAP_1(cvtepi8_epi32, __m128i) +OPENCV_HAL_SSE_WRAP_1(cvtepu16_epi32, __m128i) +OPENCV_HAL_SSE_WRAP_1(cvtepi16_epi32, __m128i) +OPENCV_HAL_SSE_WRAP_1(cvtepu32_epi64, __m128i) +OPENCV_HAL_SSE_WRAP_1(cvtepi32_epi64, __m128i) +OPENCV_HAL_SSE_WRAP_2(min_epu32, __m128i) +OPENCV_HAL_SSE_WRAP_2(mullo_epi32, __m128i) +OPENCV_HAL_SSE_WRAP_3(blendv_epi8, __m128i) +#endif // !CV_SSE4_1 + +///////////////////////////// Revolutionary ///////////////////////////// + +/** Convert **/ +// 16 << 8 +inline __m128i _v128_cvtepu8_epi16_high(const __m128i& a) +{ + const __m128i z = _mm_setzero_si128(); + return _mm_unpackhi_epi8(a, z); +} +inline __m128i _v128_cvtepi8_epi16_high(const __m128i& a) +{ return _mm_srai_epi16(_mm_unpackhi_epi8(a, a), 8); } +// 32 << 16 +inline __m128i _v128_cvtepu16_epi32_high(const __m128i& a) +{ + const __m128i z = _mm_setzero_si128(); + return _mm_unpackhi_epi16(a, z); +} +inline __m128i _v128_cvtepi16_epi32_high(const __m128i& a) +{ return _mm_srai_epi32(_mm_unpackhi_epi16(a, a), 16); } +// 64 << 32 +inline __m128i _v128_cvtepu32_epi64_high(const __m128i& a) +{ + const __m128i z = _mm_setzero_si128(); + return _mm_unpackhi_epi32(a, z); +} +inline __m128i _v128_cvtepi32_epi64_high(const __m128i& a) +{ return _mm_unpackhi_epi32(a, _mm_srai_epi32(a, 31)); } + +/** Miscellaneous **/ +inline __m128i _v128_packs_epu32(const __m128i& a, const __m128i& b) +{ + const __m128i m = _mm_set1_epi32(65535); + __m128i am = _v128_min_epu32(a, m); + __m128i bm = _v128_min_epu32(b, m); +#if CV_SSE4_1 + return _mm_packus_epi32(am, bm); +#else + const __m128i d = _mm_set1_epi32(32768), nd = _mm_set1_epi16(-32768); + am = _mm_sub_epi32(am, d); + bm = _mm_sub_epi32(bm, d); + am = _mm_packs_epi32(am, bm); + return _mm_sub_epi16(am, nd); +#endif +} + +template +inline int64 _v128_extract_epi64(const __m128i& a) +{ +#if defined(CV__SIMD_HAVE_mm_extract_epi64) || (CV_SSE4_1 && (defined(__x86_64__)/*GCC*/ || defined(_M_X64)/*MSVC*/)) +#define CV__SIMD_NATIVE_mm_extract_epi64 1 + return _mm_extract_epi64(a, i); +#else + CV_DECL_ALIGNED(16) int64 tmp[2]; + _mm_store_si128((__m128i*)tmp, a); + return tmp[i]; +#endif +} + +CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END + +//! @endcond + +} // cv:: + +#endif // OPENCV_HAL_INTRIN_SSE_EM_HPP diff --git a/IPL/include/opencv/opencv2/core/hal/intrin_vsx.hpp b/IPL/include/opencv/opencv2/core/hal/intrin_vsx.hpp new file mode 100644 index 0000000..e0f6cbf --- /dev/null +++ b/IPL/include/opencv/opencv2/core/hal/intrin_vsx.hpp @@ -0,0 +1,1578 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html + +#ifndef OPENCV_HAL_VSX_HPP +#define OPENCV_HAL_VSX_HPP + +#include +#include "opencv2/core/utility.hpp" + +#define CV_SIMD128 1 +#define CV_SIMD128_64F 1 + +namespace cv +{ + +//! @cond IGNORED + +CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN + +///////// Types //////////// + +struct v_uint8x16 +{ + typedef uchar lane_type; + enum { nlanes = 16 }; + vec_uchar16 val; + + explicit v_uint8x16(const vec_uchar16& v) : val(v) + {} + v_uint8x16() : val(vec_uchar16_z) + {} + v_uint8x16(vec_bchar16 v) : val(vec_uchar16_c(v)) + {} + v_uint8x16(uchar v0, uchar v1, uchar v2, uchar v3, uchar v4, uchar v5, uchar v6, uchar v7, + uchar v8, uchar v9, uchar v10, uchar v11, uchar v12, uchar v13, uchar v14, uchar v15) + : val(vec_uchar16_set(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15)) + {} + uchar get0() const + { return vec_extract(val, 0); } +}; + +struct v_int8x16 +{ + typedef schar lane_type; + enum { nlanes = 16 }; + vec_char16 val; + + explicit v_int8x16(const vec_char16& v) : val(v) + {} + v_int8x16() : val(vec_char16_z) + {} + v_int8x16(vec_bchar16 v) : val(vec_char16_c(v)) + {} + v_int8x16(schar v0, schar v1, schar v2, schar v3, schar v4, schar v5, schar v6, schar v7, + schar v8, schar v9, schar v10, schar v11, schar v12, schar v13, schar v14, schar v15) + : val(vec_char16_set(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15)) + {} + schar get0() const + { return vec_extract(val, 0); } +}; + +struct v_uint16x8 +{ + typedef ushort lane_type; + enum { nlanes = 8 }; + vec_ushort8 val; + + explicit v_uint16x8(const vec_ushort8& v) : val(v) + {} + v_uint16x8() : val(vec_ushort8_z) + {} + v_uint16x8(vec_bshort8 v) : val(vec_ushort8_c(v)) + {} + v_uint16x8(ushort v0, ushort v1, ushort v2, ushort v3, ushort v4, ushort v5, ushort v6, ushort v7) + : val(vec_ushort8_set(v0, v1, v2, v3, v4, v5, v6, v7)) + {} + ushort get0() const + { return vec_extract(val, 0); } +}; + +struct v_int16x8 +{ + typedef short lane_type; + enum { nlanes = 8 }; + vec_short8 val; + + explicit v_int16x8(const vec_short8& v) : val(v) + {} + v_int16x8() : val(vec_short8_z) + {} + v_int16x8(vec_bshort8 v) : val(vec_short8_c(v)) + {} + v_int16x8(short v0, short v1, short v2, short v3, short v4, short v5, short v6, short v7) + : val(vec_short8_set(v0, v1, v2, v3, v4, v5, v6, v7)) + {} + short get0() const + { return vec_extract(val, 0); } +}; + +struct v_uint32x4 +{ + typedef unsigned lane_type; + enum { nlanes = 4 }; + vec_uint4 val; + + explicit v_uint32x4(const vec_uint4& v) : val(v) + {} + v_uint32x4() : val(vec_uint4_z) + {} + v_uint32x4(vec_bint4 v) : val(vec_uint4_c(v)) + {} + v_uint32x4(unsigned v0, unsigned v1, unsigned v2, unsigned v3) : val(vec_uint4_set(v0, v1, v2, v3)) + {} + uint get0() const + { return vec_extract(val, 0); } +}; + +struct v_int32x4 +{ + typedef int lane_type; + enum { nlanes = 4 }; + vec_int4 val; + + explicit v_int32x4(const vec_int4& v) : val(v) + {} + v_int32x4() : val(vec_int4_z) + {} + v_int32x4(vec_bint4 v) : val(vec_int4_c(v)) + {} + v_int32x4(int v0, int v1, int v2, int v3) : val(vec_int4_set(v0, v1, v2, v3)) + {} + int get0() const + { return vec_extract(val, 0); } +}; + +struct v_float32x4 +{ + typedef float lane_type; + enum { nlanes = 4 }; + vec_float4 val; + + explicit v_float32x4(const vec_float4& v) : val(v) + {} + v_float32x4() : val(vec_float4_z) + {} + v_float32x4(vec_bint4 v) : val(vec_float4_c(v)) + {} + v_float32x4(float v0, float v1, float v2, float v3) : val(vec_float4_set(v0, v1, v2, v3)) + {} + float get0() const + { return vec_extract(val, 0); } +}; + +struct v_uint64x2 +{ + typedef uint64 lane_type; + enum { nlanes = 2 }; + vec_udword2 val; + + explicit v_uint64x2(const vec_udword2& v) : val(v) + {} + v_uint64x2() : val(vec_udword2_z) + {} + v_uint64x2(vec_bdword2 v) : val(vec_udword2_c(v)) + {} + v_uint64x2(uint64 v0, uint64 v1) : val(vec_udword2_set(v0, v1)) + {} + uint64 get0() const + { return vec_extract(val, 0); } +}; + +struct v_int64x2 +{ + typedef int64 lane_type; + enum { nlanes = 2 }; + vec_dword2 val; + + explicit v_int64x2(const vec_dword2& v) : val(v) + {} + v_int64x2() : val(vec_dword2_z) + {} + v_int64x2(vec_bdword2 v) : val(vec_dword2_c(v)) + {} + v_int64x2(int64 v0, int64 v1) : val(vec_dword2_set(v0, v1)) + {} + int64 get0() const + { return vec_extract(val, 0); } +}; + +struct v_float64x2 +{ + typedef double lane_type; + enum { nlanes = 2 }; + vec_double2 val; + + explicit v_float64x2(const vec_double2& v) : val(v) + {} + v_float64x2() : val(vec_double2_z) + {} + v_float64x2(vec_bdword2 v) : val(vec_double2_c(v)) + {} + v_float64x2(double v0, double v1) : val(vec_double2_set(v0, v1)) + {} + double get0() const + { return vec_extract(val, 0); } +}; + +#define OPENCV_HAL_IMPL_VSX_EXTRACT_N(_Tpvec, _Tp) \ +template inline _Tp v_extract_n(VSX_UNUSED(_Tpvec v)) { return vec_extract(v.val, i); } + +OPENCV_HAL_IMPL_VSX_EXTRACT_N(v_uint8x16, uchar) +OPENCV_HAL_IMPL_VSX_EXTRACT_N(v_int8x16, schar) +OPENCV_HAL_IMPL_VSX_EXTRACT_N(v_uint16x8, ushort) +OPENCV_HAL_IMPL_VSX_EXTRACT_N(v_int16x8, short) +OPENCV_HAL_IMPL_VSX_EXTRACT_N(v_uint32x4, uint) +OPENCV_HAL_IMPL_VSX_EXTRACT_N(v_int32x4, int) +OPENCV_HAL_IMPL_VSX_EXTRACT_N(v_uint64x2, uint64) +OPENCV_HAL_IMPL_VSX_EXTRACT_N(v_int64x2, int64) +OPENCV_HAL_IMPL_VSX_EXTRACT_N(v_float32x4, float) +OPENCV_HAL_IMPL_VSX_EXTRACT_N(v_float64x2, double) + +//////////////// Load and store operations /////////////// + +/* + * clang-5 aborted during parse "vec_xxx_c" only if it's + * inside a function template which is defined by preprocessor macro. + * + * if vec_xxx_c defined as C++ cast, clang-5 will pass it +*/ +#define OPENCV_HAL_IMPL_VSX_INITVEC(_Tpvec, _Tp, suffix, cast) \ +inline _Tpvec v_setzero_##suffix() { return _Tpvec(); } \ +inline _Tpvec v_setall_##suffix(_Tp v) { return _Tpvec(vec_splats((_Tp)v));} \ +template inline _Tpvec v_reinterpret_as_##suffix(const _Tpvec0 &a) \ +{ return _Tpvec((cast)a.val); } + +OPENCV_HAL_IMPL_VSX_INITVEC(v_uint8x16, uchar, u8, vec_uchar16) +OPENCV_HAL_IMPL_VSX_INITVEC(v_int8x16, schar, s8, vec_char16) +OPENCV_HAL_IMPL_VSX_INITVEC(v_uint16x8, ushort, u16, vec_ushort8) +OPENCV_HAL_IMPL_VSX_INITVEC(v_int16x8, short, s16, vec_short8) +OPENCV_HAL_IMPL_VSX_INITVEC(v_uint32x4, uint, u32, vec_uint4) +OPENCV_HAL_IMPL_VSX_INITVEC(v_int32x4, int, s32, vec_int4) +OPENCV_HAL_IMPL_VSX_INITVEC(v_uint64x2, uint64, u64, vec_udword2) +OPENCV_HAL_IMPL_VSX_INITVEC(v_int64x2, int64, s64, vec_dword2) +OPENCV_HAL_IMPL_VSX_INITVEC(v_float32x4, float, f32, vec_float4) +OPENCV_HAL_IMPL_VSX_INITVEC(v_float64x2, double, f64, vec_double2) + +#define OPENCV_HAL_IMPL_VSX_LOADSTORE_C(_Tpvec, _Tp, ld, ld_a, st, st_a) \ +inline _Tpvec v_load(const _Tp* ptr) \ +{ return _Tpvec(ld(0, ptr)); } \ +inline _Tpvec v_load_aligned(VSX_UNUSED(const _Tp* ptr)) \ +{ return _Tpvec(ld_a(0, ptr)); } \ +inline _Tpvec v_load_low(const _Tp* ptr) \ +{ return _Tpvec(vec_ld_l8(ptr)); } \ +inline _Tpvec v_load_halves(const _Tp* ptr0, const _Tp* ptr1) \ +{ return _Tpvec(vec_mergesqh(vec_ld_l8(ptr0), vec_ld_l8(ptr1))); } \ +inline void v_store(_Tp* ptr, const _Tpvec& a) \ +{ st(a.val, 0, ptr); } \ +inline void v_store_aligned(VSX_UNUSED(_Tp* ptr), const _Tpvec& a) \ +{ st_a(a.val, 0, ptr); } \ +inline void v_store_aligned_nocache(VSX_UNUSED(_Tp* ptr), const _Tpvec& a) \ +{ st_a(a.val, 0, ptr); } \ +inline void v_store(_Tp* ptr, const _Tpvec& a, hal::StoreMode mode) \ +{ if(mode == hal::STORE_UNALIGNED) st(a.val, 0, ptr); else st_a(a.val, 0, ptr); } \ +inline void v_store_low(_Tp* ptr, const _Tpvec& a) \ +{ vec_st_l8(a.val, ptr); } \ +inline void v_store_high(_Tp* ptr, const _Tpvec& a) \ +{ vec_st_h8(a.val, ptr); } + +// working around gcc bug for aligned ld/st +// if runtime check for vec_ld/st fail we failback to unaligned ld/st +// https://github.com/opencv/opencv/issues/13211 +#ifdef CV_COMPILER_VSX_BROKEN_ALIGNED + #define OPENCV_HAL_IMPL_VSX_LOADSTORE(_Tpvec, _Tp) \ + OPENCV_HAL_IMPL_VSX_LOADSTORE_C(_Tpvec, _Tp, vsx_ld, vsx_ld, vsx_st, vsx_st) +#else + #define OPENCV_HAL_IMPL_VSX_LOADSTORE(_Tpvec, _Tp) \ + OPENCV_HAL_IMPL_VSX_LOADSTORE_C(_Tpvec, _Tp, vsx_ld, vec_ld, vsx_st, vec_st) +#endif + +OPENCV_HAL_IMPL_VSX_LOADSTORE(v_uint8x16, uchar) +OPENCV_HAL_IMPL_VSX_LOADSTORE(v_int8x16, schar) +OPENCV_HAL_IMPL_VSX_LOADSTORE(v_uint16x8, ushort) +OPENCV_HAL_IMPL_VSX_LOADSTORE(v_int16x8, short) +OPENCV_HAL_IMPL_VSX_LOADSTORE(v_uint32x4, uint) +OPENCV_HAL_IMPL_VSX_LOADSTORE(v_int32x4, int) +OPENCV_HAL_IMPL_VSX_LOADSTORE(v_float32x4, float) + +OPENCV_HAL_IMPL_VSX_LOADSTORE_C(v_float64x2, double, vsx_ld, vsx_ld, vsx_st, vsx_st) +OPENCV_HAL_IMPL_VSX_LOADSTORE_C(v_uint64x2, uint64, vsx_ld2, vsx_ld2, vsx_st2, vsx_st2) +OPENCV_HAL_IMPL_VSX_LOADSTORE_C(v_int64x2, int64, vsx_ld2, vsx_ld2, vsx_st2, vsx_st2) + +//////////////// Value reordering /////////////// + +/* de&interleave */ +#define OPENCV_HAL_IMPL_VSX_INTERLEAVE(_Tp, _Tpvec) \ +inline void v_load_deinterleave(const _Tp* ptr, _Tpvec& a, _Tpvec& b) \ +{ vec_ld_deinterleave(ptr, a.val, b.val);} \ +inline void v_load_deinterleave(const _Tp* ptr, _Tpvec& a, \ + _Tpvec& b, _Tpvec& c) \ +{ vec_ld_deinterleave(ptr, a.val, b.val, c.val); } \ +inline void v_load_deinterleave(const _Tp* ptr, _Tpvec& a, _Tpvec& b, \ + _Tpvec& c, _Tpvec& d) \ +{ vec_ld_deinterleave(ptr, a.val, b.val, c.val, d.val); } \ +inline void v_store_interleave(_Tp* ptr, const _Tpvec& a, const _Tpvec& b, \ + hal::StoreMode /*mode*/=hal::STORE_UNALIGNED) \ +{ vec_st_interleave(a.val, b.val, ptr); } \ +inline void v_store_interleave(_Tp* ptr, const _Tpvec& a, \ + const _Tpvec& b, const _Tpvec& c, \ + hal::StoreMode /*mode*/=hal::STORE_UNALIGNED) \ +{ vec_st_interleave(a.val, b.val, c.val, ptr); } \ +inline void v_store_interleave(_Tp* ptr, const _Tpvec& a, const _Tpvec& b, \ + const _Tpvec& c, const _Tpvec& d, \ + hal::StoreMode /*mode*/=hal::STORE_UNALIGNED) \ +{ vec_st_interleave(a.val, b.val, c.val, d.val, ptr); } + +OPENCV_HAL_IMPL_VSX_INTERLEAVE(uchar, v_uint8x16) +OPENCV_HAL_IMPL_VSX_INTERLEAVE(schar, v_int8x16) +OPENCV_HAL_IMPL_VSX_INTERLEAVE(ushort, v_uint16x8) +OPENCV_HAL_IMPL_VSX_INTERLEAVE(short, v_int16x8) +OPENCV_HAL_IMPL_VSX_INTERLEAVE(uint, v_uint32x4) +OPENCV_HAL_IMPL_VSX_INTERLEAVE(int, v_int32x4) +OPENCV_HAL_IMPL_VSX_INTERLEAVE(float, v_float32x4) +OPENCV_HAL_IMPL_VSX_INTERLEAVE(double, v_float64x2) +OPENCV_HAL_IMPL_VSX_INTERLEAVE(int64, v_int64x2) +OPENCV_HAL_IMPL_VSX_INTERLEAVE(uint64, v_uint64x2) + +/* Expand */ +#define OPENCV_HAL_IMPL_VSX_EXPAND(_Tpvec, _Tpwvec, _Tp, fl, fh) \ +inline void v_expand(const _Tpvec& a, _Tpwvec& b0, _Tpwvec& b1) \ +{ \ + b0.val = fh(a.val); \ + b1.val = fl(a.val); \ +} \ +inline _Tpwvec v_expand_low(const _Tpvec& a) \ +{ return _Tpwvec(fh(a.val)); } \ +inline _Tpwvec v_expand_high(const _Tpvec& a) \ +{ return _Tpwvec(fl(a.val)); } \ +inline _Tpwvec v_load_expand(const _Tp* ptr) \ +{ return _Tpwvec(fh(vec_ld_l8(ptr))); } + +OPENCV_HAL_IMPL_VSX_EXPAND(v_uint8x16, v_uint16x8, uchar, vec_unpacklu, vec_unpackhu) +OPENCV_HAL_IMPL_VSX_EXPAND(v_int8x16, v_int16x8, schar, vec_unpackl, vec_unpackh) +OPENCV_HAL_IMPL_VSX_EXPAND(v_uint16x8, v_uint32x4, ushort, vec_unpacklu, vec_unpackhu) +OPENCV_HAL_IMPL_VSX_EXPAND(v_int16x8, v_int32x4, short, vec_unpackl, vec_unpackh) +OPENCV_HAL_IMPL_VSX_EXPAND(v_uint32x4, v_uint64x2, uint, vec_unpacklu, vec_unpackhu) +OPENCV_HAL_IMPL_VSX_EXPAND(v_int32x4, v_int64x2, int, vec_unpackl, vec_unpackh) + +/* Load and zero expand a 4 byte value into the second dword, first is don't care. */ +#if !defined(CV_COMPILER_VSX_BROKEN_ASM) + #define _LXSIWZX(out, ptr, T) __asm__ ("lxsiwzx %x0, 0, %1\r\n" : "=wa"(out) : "r" (ptr) : "memory"); +#else + /* This is compiler-agnostic, but will introduce an unneeded splat on the critical path. */ + #define _LXSIWZX(out, ptr, T) out = (T)vec_udword2_sp(*(uint32_t*)(ptr)); +#endif + +inline v_uint32x4 v_load_expand_q(const uchar* ptr) +{ + // Zero-extend the extra 24B instead of unpacking. Usually faster in small kernel + // Likewise note, value is zero extended and upper 4 bytes are zero'ed. + vec_uchar16 pmu = {8, 12, 12, 12, 9, 12, 12, 12, 10, 12, 12, 12, 11, 12, 12, 12}; + vec_uchar16 out; + + _LXSIWZX(out, ptr, vec_uchar16); + out = vec_perm(out, out, pmu); + return v_uint32x4((vec_uint4)out); +} + +inline v_int32x4 v_load_expand_q(const schar* ptr) +{ + vec_char16 out; + vec_short8 outs; + vec_int4 outw; + + _LXSIWZX(out, ptr, vec_char16); + outs = vec_unpackl(out); + outw = vec_unpackh(outs); + return v_int32x4(outw); +} + +/* pack */ +#define OPENCV_HAL_IMPL_VSX_PACK(_Tpvec, _Tp, _Tpwvec, _Tpvn, _Tpdel, sfnc, pkfnc, addfnc, pack) \ +inline _Tpvec v_##pack(const _Tpwvec& a, const _Tpwvec& b) \ +{ \ + return _Tpvec(pkfnc(a.val, b.val)); \ +} \ +inline void v_##pack##_store(_Tp* ptr, const _Tpwvec& a) \ +{ \ + vec_st_l8(pkfnc(a.val, a.val), ptr); \ +} \ +template \ +inline _Tpvec v_rshr_##pack(const _Tpwvec& a, const _Tpwvec& b) \ +{ \ + const __vector _Tpvn vn = vec_splats((_Tpvn)n); \ + const __vector _Tpdel delta = vec_splats((_Tpdel)((_Tpdel)1 << (n-1))); \ + return _Tpvec(pkfnc(sfnc(addfnc(a.val, delta), vn), sfnc(addfnc(b.val, delta), vn))); \ +} \ +template \ +inline void v_rshr_##pack##_store(_Tp* ptr, const _Tpwvec& a) \ +{ \ + const __vector _Tpvn vn = vec_splats((_Tpvn)n); \ + const __vector _Tpdel delta = vec_splats((_Tpdel)((_Tpdel)1 << (n-1))); \ + vec_st_l8(pkfnc(sfnc(addfnc(a.val, delta), vn), delta), ptr); \ +} + +OPENCV_HAL_IMPL_VSX_PACK(v_uint8x16, uchar, v_uint16x8, unsigned short, unsigned short, + vec_sr, vec_packs, vec_adds, pack) +OPENCV_HAL_IMPL_VSX_PACK(v_int8x16, schar, v_int16x8, unsigned short, short, + vec_sra, vec_packs, vec_adds, pack) + +OPENCV_HAL_IMPL_VSX_PACK(v_uint16x8, ushort, v_uint32x4, unsigned int, unsigned int, + vec_sr, vec_packs, vec_add, pack) +OPENCV_HAL_IMPL_VSX_PACK(v_int16x8, short, v_int32x4, unsigned int, int, + vec_sra, vec_packs, vec_add, pack) + +OPENCV_HAL_IMPL_VSX_PACK(v_uint32x4, uint, v_uint64x2, unsigned long long, unsigned long long, + vec_sr, vec_pack, vec_add, pack) +OPENCV_HAL_IMPL_VSX_PACK(v_int32x4, int, v_int64x2, unsigned long long, long long, + vec_sra, vec_pack, vec_add, pack) + +OPENCV_HAL_IMPL_VSX_PACK(v_uint8x16, uchar, v_int16x8, unsigned short, short, + vec_sra, vec_packsu, vec_adds, pack_u) +OPENCV_HAL_IMPL_VSX_PACK(v_uint16x8, ushort, v_int32x4, unsigned int, int, + vec_sra, vec_packsu, vec_add, pack_u) +// Following variant is not implemented on other platforms: +//OPENCV_HAL_IMPL_VSX_PACK(v_uint32x4, uint, v_int64x2, unsigned long long, long long, +// vec_sra, vec_packsu, vec_add, pack_u) + +// pack boolean +inline v_uint8x16 v_pack_b(const v_uint16x8& a, const v_uint16x8& b) +{ + vec_uchar16 ab = vec_pack(a.val, b.val); + return v_uint8x16(ab); +} + +inline v_uint8x16 v_pack_b(const v_uint32x4& a, const v_uint32x4& b, + const v_uint32x4& c, const v_uint32x4& d) +{ + vec_ushort8 ab = vec_pack(a.val, b.val); + vec_ushort8 cd = vec_pack(c.val, d.val); + return v_uint8x16(vec_pack(ab, cd)); +} + +inline v_uint8x16 v_pack_b(const v_uint64x2& a, const v_uint64x2& b, const v_uint64x2& c, + const v_uint64x2& d, const v_uint64x2& e, const v_uint64x2& f, + const v_uint64x2& g, const v_uint64x2& h) +{ + vec_uint4 ab = vec_pack(a.val, b.val); + vec_uint4 cd = vec_pack(c.val, d.val); + vec_uint4 ef = vec_pack(e.val, f.val); + vec_uint4 gh = vec_pack(g.val, h.val); + + vec_ushort8 abcd = vec_pack(ab, cd); + vec_ushort8 efgh = vec_pack(ef, gh); + return v_uint8x16(vec_pack(abcd, efgh)); +} + +/* Recombine */ +template +inline void v_zip(const _Tpvec& a0, const _Tpvec& a1, _Tpvec& b0, _Tpvec& b1) +{ + b0.val = vec_mergeh(a0.val, a1.val); + b1.val = vec_mergel(a0.val, a1.val); +} + +template +inline _Tpvec v_combine_high(const _Tpvec& a, const _Tpvec& b) +{ return _Tpvec(vec_mergesql(a.val, b.val)); } + +template +inline _Tpvec v_combine_low(const _Tpvec& a, const _Tpvec& b) +{ return _Tpvec(vec_mergesqh(a.val, b.val)); } + +template +inline void v_recombine(const _Tpvec& a, const _Tpvec& b, _Tpvec& c, _Tpvec& d) +{ + c.val = vec_mergesqh(a.val, b.val); + d.val = vec_mergesql(a.val, b.val); +} + +////////// Arithmetic, bitwise and comparison operations ///////// + +/* Element-wise binary and unary operations */ +/** Arithmetics **/ +#define OPENCV_HAL_IMPL_VSX_BIN_OP(bin_op, _Tpvec, intrin) \ +inline _Tpvec operator bin_op (const _Tpvec& a, const _Tpvec& b) \ +{ return _Tpvec(intrin(a.val, b.val)); } \ +inline _Tpvec& operator bin_op##= (_Tpvec& a, const _Tpvec& b) \ +{ a.val = intrin(a.val, b.val); return a; } + +OPENCV_HAL_IMPL_VSX_BIN_OP(+, v_uint8x16, vec_adds) +OPENCV_HAL_IMPL_VSX_BIN_OP(-, v_uint8x16, vec_subs) +OPENCV_HAL_IMPL_VSX_BIN_OP(+, v_int8x16, vec_adds) +OPENCV_HAL_IMPL_VSX_BIN_OP(-, v_int8x16, vec_subs) +OPENCV_HAL_IMPL_VSX_BIN_OP(+, v_uint16x8, vec_adds) +OPENCV_HAL_IMPL_VSX_BIN_OP(-, v_uint16x8, vec_subs) +OPENCV_HAL_IMPL_VSX_BIN_OP(+, v_int16x8, vec_adds) +OPENCV_HAL_IMPL_VSX_BIN_OP(-, v_int16x8, vec_subs) +OPENCV_HAL_IMPL_VSX_BIN_OP(+, v_uint32x4, vec_add) +OPENCV_HAL_IMPL_VSX_BIN_OP(-, v_uint32x4, vec_sub) +OPENCV_HAL_IMPL_VSX_BIN_OP(*, v_uint32x4, vec_mul) +OPENCV_HAL_IMPL_VSX_BIN_OP(+, v_int32x4, vec_add) +OPENCV_HAL_IMPL_VSX_BIN_OP(-, v_int32x4, vec_sub) +OPENCV_HAL_IMPL_VSX_BIN_OP(*, v_int32x4, vec_mul) +OPENCV_HAL_IMPL_VSX_BIN_OP(+, v_float32x4, vec_add) +OPENCV_HAL_IMPL_VSX_BIN_OP(-, v_float32x4, vec_sub) +OPENCV_HAL_IMPL_VSX_BIN_OP(*, v_float32x4, vec_mul) +OPENCV_HAL_IMPL_VSX_BIN_OP(/, v_float32x4, vec_div) +OPENCV_HAL_IMPL_VSX_BIN_OP(+, v_float64x2, vec_add) +OPENCV_HAL_IMPL_VSX_BIN_OP(-, v_float64x2, vec_sub) +OPENCV_HAL_IMPL_VSX_BIN_OP(*, v_float64x2, vec_mul) +OPENCV_HAL_IMPL_VSX_BIN_OP(/, v_float64x2, vec_div) +OPENCV_HAL_IMPL_VSX_BIN_OP(+, v_uint64x2, vec_add) +OPENCV_HAL_IMPL_VSX_BIN_OP(-, v_uint64x2, vec_sub) +OPENCV_HAL_IMPL_VSX_BIN_OP(+, v_int64x2, vec_add) +OPENCV_HAL_IMPL_VSX_BIN_OP(-, v_int64x2, vec_sub) + +// saturating multiply +#define OPENCV_HAL_IMPL_VSX_MUL_SAT(_Tpvec, _Tpwvec) \ + inline _Tpvec operator * (const _Tpvec& a, const _Tpvec& b) \ + { \ + _Tpwvec c, d; \ + v_mul_expand(a, b, c, d); \ + return v_pack(c, d); \ + } \ + inline _Tpvec& operator *= (_Tpvec& a, const _Tpvec& b) \ + { a = a * b; return a; } + +OPENCV_HAL_IMPL_VSX_MUL_SAT(v_int8x16, v_int16x8) +OPENCV_HAL_IMPL_VSX_MUL_SAT(v_uint8x16, v_uint16x8) +OPENCV_HAL_IMPL_VSX_MUL_SAT(v_int16x8, v_int32x4) +OPENCV_HAL_IMPL_VSX_MUL_SAT(v_uint16x8, v_uint32x4) + +template +inline void v_mul_expand(const Tvec& a, const Tvec& b, Twvec& c, Twvec& d) +{ + Twvec p0 = Twvec(vec_mule(a.val, b.val)); + Twvec p1 = Twvec(vec_mulo(a.val, b.val)); + v_zip(p0, p1, c, d); +} + +inline v_int16x8 v_mul_hi(const v_int16x8& a, const v_int16x8& b) +{ + vec_int4 p0 = vec_mule(a.val, b.val); + vec_int4 p1 = vec_mulo(a.val, b.val); + static const vec_uchar16 perm = {2, 3, 18, 19, 6, 7, 22, 23, 10, 11, 26, 27, 14, 15, 30, 31}; + return v_int16x8(vec_perm(vec_short8_c(p0), vec_short8_c(p1), perm)); +} +inline v_uint16x8 v_mul_hi(const v_uint16x8& a, const v_uint16x8& b) +{ + vec_uint4 p0 = vec_mule(a.val, b.val); + vec_uint4 p1 = vec_mulo(a.val, b.val); + static const vec_uchar16 perm = {2, 3, 18, 19, 6, 7, 22, 23, 10, 11, 26, 27, 14, 15, 30, 31}; + return v_uint16x8(vec_perm(vec_ushort8_c(p0), vec_ushort8_c(p1), perm)); +} + +/** Non-saturating arithmetics **/ +#define OPENCV_HAL_IMPL_VSX_BIN_FUNC(func, intrin) \ +template \ +inline _Tpvec func(const _Tpvec& a, const _Tpvec& b) \ +{ return _Tpvec(intrin(a.val, b.val)); } + +OPENCV_HAL_IMPL_VSX_BIN_FUNC(v_add_wrap, vec_add) +OPENCV_HAL_IMPL_VSX_BIN_FUNC(v_sub_wrap, vec_sub) +OPENCV_HAL_IMPL_VSX_BIN_FUNC(v_mul_wrap, vec_mul) + +/** Bitwise shifts **/ +#define OPENCV_HAL_IMPL_VSX_SHIFT_OP(_Tpvec, shr, splfunc) \ +inline _Tpvec operator << (const _Tpvec& a, int imm) \ +{ return _Tpvec(vec_sl(a.val, splfunc(imm))); } \ +inline _Tpvec operator >> (const _Tpvec& a, int imm) \ +{ return _Tpvec(shr(a.val, splfunc(imm))); } \ +template inline _Tpvec v_shl(const _Tpvec& a) \ +{ return _Tpvec(vec_sl(a.val, splfunc(imm))); } \ +template inline _Tpvec v_shr(const _Tpvec& a) \ +{ return _Tpvec(shr(a.val, splfunc(imm))); } + +OPENCV_HAL_IMPL_VSX_SHIFT_OP(v_uint8x16, vec_sr, vec_uchar16_sp) +OPENCV_HAL_IMPL_VSX_SHIFT_OP(v_uint16x8, vec_sr, vec_ushort8_sp) +OPENCV_HAL_IMPL_VSX_SHIFT_OP(v_uint32x4, vec_sr, vec_uint4_sp) +OPENCV_HAL_IMPL_VSX_SHIFT_OP(v_uint64x2, vec_sr, vec_udword2_sp) +// algebraic right shift +OPENCV_HAL_IMPL_VSX_SHIFT_OP(v_int8x16, vec_sra, vec_uchar16_sp) +OPENCV_HAL_IMPL_VSX_SHIFT_OP(v_int16x8, vec_sra, vec_ushort8_sp) +OPENCV_HAL_IMPL_VSX_SHIFT_OP(v_int32x4, vec_sra, vec_uint4_sp) +OPENCV_HAL_IMPL_VSX_SHIFT_OP(v_int64x2, vec_sra, vec_udword2_sp) + +/** Bitwise logic **/ +#define OPENCV_HAL_IMPL_VSX_LOGIC_OP(_Tpvec) \ +OPENCV_HAL_IMPL_VSX_BIN_OP(&, _Tpvec, vec_and) \ +OPENCV_HAL_IMPL_VSX_BIN_OP(|, _Tpvec, vec_or) \ +OPENCV_HAL_IMPL_VSX_BIN_OP(^, _Tpvec, vec_xor) \ +inline _Tpvec operator ~ (const _Tpvec& a) \ +{ return _Tpvec(vec_not(a.val)); } + +OPENCV_HAL_IMPL_VSX_LOGIC_OP(v_uint8x16) +OPENCV_HAL_IMPL_VSX_LOGIC_OP(v_int8x16) +OPENCV_HAL_IMPL_VSX_LOGIC_OP(v_uint16x8) +OPENCV_HAL_IMPL_VSX_LOGIC_OP(v_int16x8) +OPENCV_HAL_IMPL_VSX_LOGIC_OP(v_uint32x4) +OPENCV_HAL_IMPL_VSX_LOGIC_OP(v_int32x4) +OPENCV_HAL_IMPL_VSX_LOGIC_OP(v_uint64x2) +OPENCV_HAL_IMPL_VSX_LOGIC_OP(v_int64x2) +OPENCV_HAL_IMPL_VSX_LOGIC_OP(v_float32x4) +OPENCV_HAL_IMPL_VSX_LOGIC_OP(v_float64x2) + +/** Bitwise select **/ +#define OPENCV_HAL_IMPL_VSX_SELECT(_Tpvec, cast) \ +inline _Tpvec v_select(const _Tpvec& mask, const _Tpvec& a, const _Tpvec& b) \ +{ return _Tpvec(vec_sel(b.val, a.val, cast(mask.val))); } + +OPENCV_HAL_IMPL_VSX_SELECT(v_uint8x16, vec_bchar16_c) +OPENCV_HAL_IMPL_VSX_SELECT(v_int8x16, vec_bchar16_c) +OPENCV_HAL_IMPL_VSX_SELECT(v_uint16x8, vec_bshort8_c) +OPENCV_HAL_IMPL_VSX_SELECT(v_int16x8, vec_bshort8_c) +OPENCV_HAL_IMPL_VSX_SELECT(v_uint32x4, vec_bint4_c) +OPENCV_HAL_IMPL_VSX_SELECT(v_int32x4, vec_bint4_c) +OPENCV_HAL_IMPL_VSX_SELECT(v_float32x4, vec_bint4_c) +OPENCV_HAL_IMPL_VSX_SELECT(v_float64x2, vec_bdword2_c) + +/** Comparison **/ +#define OPENCV_HAL_IMPL_VSX_INT_CMP_OP(_Tpvec) \ +inline _Tpvec operator == (const _Tpvec& a, const _Tpvec& b) \ +{ return _Tpvec(vec_cmpeq(a.val, b.val)); } \ +inline _Tpvec operator != (const _Tpvec& a, const _Tpvec& b) \ +{ return _Tpvec(vec_cmpne(a.val, b.val)); } \ +inline _Tpvec operator < (const _Tpvec& a, const _Tpvec& b) \ +{ return _Tpvec(vec_cmplt(a.val, b.val)); } \ +inline _Tpvec operator > (const _Tpvec& a, const _Tpvec& b) \ +{ return _Tpvec(vec_cmpgt(a.val, b.val)); } \ +inline _Tpvec operator <= (const _Tpvec& a, const _Tpvec& b) \ +{ return _Tpvec(vec_cmple(a.val, b.val)); } \ +inline _Tpvec operator >= (const _Tpvec& a, const _Tpvec& b) \ +{ return _Tpvec(vec_cmpge(a.val, b.val)); } + +OPENCV_HAL_IMPL_VSX_INT_CMP_OP(v_uint8x16) +OPENCV_HAL_IMPL_VSX_INT_CMP_OP(v_int8x16) +OPENCV_HAL_IMPL_VSX_INT_CMP_OP(v_uint16x8) +OPENCV_HAL_IMPL_VSX_INT_CMP_OP(v_int16x8) +OPENCV_HAL_IMPL_VSX_INT_CMP_OP(v_uint32x4) +OPENCV_HAL_IMPL_VSX_INT_CMP_OP(v_int32x4) +OPENCV_HAL_IMPL_VSX_INT_CMP_OP(v_float32x4) +OPENCV_HAL_IMPL_VSX_INT_CMP_OP(v_float64x2) +OPENCV_HAL_IMPL_VSX_INT_CMP_OP(v_uint64x2) +OPENCV_HAL_IMPL_VSX_INT_CMP_OP(v_int64x2) + +inline v_float32x4 v_not_nan(const v_float32x4& a) +{ return v_float32x4(vec_cmpeq(a.val, a.val)); } +inline v_float64x2 v_not_nan(const v_float64x2& a) +{ return v_float64x2(vec_cmpeq(a.val, a.val)); } + +/** min/max **/ +OPENCV_HAL_IMPL_VSX_BIN_FUNC(v_min, vec_min) +OPENCV_HAL_IMPL_VSX_BIN_FUNC(v_max, vec_max) + +/** Rotate **/ +#define OPENCV_IMPL_VSX_ROTATE(_Tpvec, suffix, shf, cast) \ +template \ +inline _Tpvec v_rotate_##suffix(const _Tpvec& a) \ +{ \ + const int wd = imm * sizeof(typename _Tpvec::lane_type); \ + if (wd > 15) \ + return _Tpvec(); \ + return _Tpvec((cast)shf(vec_uchar16_c(a.val), vec_uchar16_sp(wd << 3))); \ +} + +#define OPENCV_IMPL_VSX_ROTATE_LR(_Tpvec, cast) \ +OPENCV_IMPL_VSX_ROTATE(_Tpvec, left, vec_slo, cast) \ +OPENCV_IMPL_VSX_ROTATE(_Tpvec, right, vec_sro, cast) + +OPENCV_IMPL_VSX_ROTATE_LR(v_uint8x16, vec_uchar16) +OPENCV_IMPL_VSX_ROTATE_LR(v_int8x16, vec_char16) +OPENCV_IMPL_VSX_ROTATE_LR(v_uint16x8, vec_ushort8) +OPENCV_IMPL_VSX_ROTATE_LR(v_int16x8, vec_short8) +OPENCV_IMPL_VSX_ROTATE_LR(v_uint32x4, vec_uint4) +OPENCV_IMPL_VSX_ROTATE_LR(v_int32x4, vec_int4) +OPENCV_IMPL_VSX_ROTATE_LR(v_float32x4, vec_float4) +OPENCV_IMPL_VSX_ROTATE_LR(v_uint64x2, vec_udword2) +OPENCV_IMPL_VSX_ROTATE_LR(v_int64x2, vec_dword2) +OPENCV_IMPL_VSX_ROTATE_LR(v_float64x2, vec_double2) + +template +inline _Tpvec v_rotate_right(const _Tpvec& a, const _Tpvec& b) +{ + enum { CV_SHIFT = 16 - imm * (sizeof(typename _Tpvec::lane_type)) }; + if (CV_SHIFT == 16) + return a; +#ifdef __IBMCPP__ + return _Tpvec(vec_sld(b.val, a.val, CV_SHIFT & 15)); +#else + return _Tpvec(vec_sld(b.val, a.val, CV_SHIFT)); +#endif +} + +template +inline _Tpvec v_rotate_left(const _Tpvec& a, const _Tpvec& b) +{ + enum { CV_SHIFT = imm * (sizeof(typename _Tpvec::lane_type)) }; + if (CV_SHIFT == 16) + return b; + return _Tpvec(vec_sld(a.val, b.val, CV_SHIFT)); +} + +#define OPENCV_IMPL_VSX_ROTATE_64_2RG(_Tpvec, suffix, rg1, rg2) \ +template \ +inline _Tpvec v_rotate_##suffix(const _Tpvec& a, const _Tpvec& b) \ +{ \ + if (imm == 1) \ + return _Tpvec(vec_permi(rg1.val, rg2.val, 2)); \ + return imm ? b : a; \ +} + +#define OPENCV_IMPL_VSX_ROTATE_64_2RG_LR(_Tpvec) \ +OPENCV_IMPL_VSX_ROTATE_64_2RG(_Tpvec, left, b, a) \ +OPENCV_IMPL_VSX_ROTATE_64_2RG(_Tpvec, right, a, b) + +OPENCV_IMPL_VSX_ROTATE_64_2RG_LR(v_float64x2) +OPENCV_IMPL_VSX_ROTATE_64_2RG_LR(v_uint64x2) +OPENCV_IMPL_VSX_ROTATE_64_2RG_LR(v_int64x2) + +/* Reverse */ +inline v_uint8x16 v_reverse(const v_uint8x16 &a) +{ + static const vec_uchar16 perm = {15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0}; + vec_uchar16 vec = (vec_uchar16)a.val; + return v_uint8x16(vec_perm(vec, vec, perm)); +} + +inline v_int8x16 v_reverse(const v_int8x16 &a) +{ return v_reinterpret_as_s8(v_reverse(v_reinterpret_as_u8(a))); } + +inline v_uint16x8 v_reverse(const v_uint16x8 &a) +{ + static const vec_uchar16 perm = {14, 15, 12, 13, 10, 11, 8, 9, 6, 7, 4, 5, 2, 3, 0, 1}; + vec_uchar16 vec = (vec_uchar16)a.val; + return v_reinterpret_as_u16(v_uint8x16(vec_perm(vec, vec, perm))); +} + +inline v_int16x8 v_reverse(const v_int16x8 &a) +{ return v_reinterpret_as_s16(v_reverse(v_reinterpret_as_u16(a))); } + +inline v_uint32x4 v_reverse(const v_uint32x4 &a) +{ + static const vec_uchar16 perm = {12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3}; + vec_uchar16 vec = (vec_uchar16)a.val; + return v_reinterpret_as_u32(v_uint8x16(vec_perm(vec, vec, perm))); +} + +inline v_int32x4 v_reverse(const v_int32x4 &a) +{ return v_reinterpret_as_s32(v_reverse(v_reinterpret_as_u32(a))); } + +inline v_float32x4 v_reverse(const v_float32x4 &a) +{ return v_reinterpret_as_f32(v_reverse(v_reinterpret_as_u32(a))); } + +inline v_uint64x2 v_reverse(const v_uint64x2 &a) +{ + static const vec_uchar16 perm = {8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7}; + vec_uchar16 vec = (vec_uchar16)a.val; + return v_reinterpret_as_u64(v_uint8x16(vec_perm(vec, vec, perm))); +} + +inline v_int64x2 v_reverse(const v_int64x2 &a) +{ return v_reinterpret_as_s64(v_reverse(v_reinterpret_as_u64(a))); } + +inline v_float64x2 v_reverse(const v_float64x2 &a) +{ return v_reinterpret_as_f64(v_reverse(v_reinterpret_as_u64(a))); } + +/* Extract */ +template +inline _Tpvec v_extract(const _Tpvec& a, const _Tpvec& b) +{ return v_rotate_right(a, b); } + +////////// Reduce and mask ///////// + +/** Reduce **/ +inline uint v_reduce_sum(const v_uint8x16& a) +{ + const vec_uint4 zero4 = vec_uint4_z; + vec_uint4 sum4 = vec_sum4s(a.val, zero4); + return (uint)vec_extract(vec_sums(vec_int4_c(sum4), vec_int4_c(zero4)), 3); +} +inline int v_reduce_sum(const v_int8x16& a) +{ + const vec_int4 zero4 = vec_int4_z; + vec_int4 sum4 = vec_sum4s(a.val, zero4); + return (int)vec_extract(vec_sums(sum4, zero4), 3); +} +inline int v_reduce_sum(const v_int16x8& a) +{ + const vec_int4 zero = vec_int4_z; + return saturate_cast(vec_extract(vec_sums(vec_sum4s(a.val, zero), zero), 3)); +} +inline uint v_reduce_sum(const v_uint16x8& a) +{ + const vec_int4 v4 = vec_int4_c(vec_unpackhu(vec_adds(a.val, vec_sld(a.val, a.val, 8)))); + return saturate_cast(vec_extract(vec_sums(v4, vec_int4_z), 3)); +} + +#define OPENCV_HAL_IMPL_VSX_REDUCE_OP_4(_Tpvec, _Tpvec2, scalartype, suffix, func) \ +inline scalartype v_reduce_##suffix(const _Tpvec& a) \ +{ \ + const _Tpvec2 rs = func(a.val, vec_sld(a.val, a.val, 8)); \ + return vec_extract(func(rs, vec_sld(rs, rs, 4)), 0); \ +} +OPENCV_HAL_IMPL_VSX_REDUCE_OP_4(v_uint32x4, vec_uint4, uint, sum, vec_add) +OPENCV_HAL_IMPL_VSX_REDUCE_OP_4(v_uint32x4, vec_uint4, uint, max, vec_max) +OPENCV_HAL_IMPL_VSX_REDUCE_OP_4(v_uint32x4, vec_uint4, uint, min, vec_min) +OPENCV_HAL_IMPL_VSX_REDUCE_OP_4(v_int32x4, vec_int4, int, sum, vec_add) +OPENCV_HAL_IMPL_VSX_REDUCE_OP_4(v_int32x4, vec_int4, int, max, vec_max) +OPENCV_HAL_IMPL_VSX_REDUCE_OP_4(v_int32x4, vec_int4, int, min, vec_min) +OPENCV_HAL_IMPL_VSX_REDUCE_OP_4(v_float32x4, vec_float4, float, sum, vec_add) +OPENCV_HAL_IMPL_VSX_REDUCE_OP_4(v_float32x4, vec_float4, float, max, vec_max) +OPENCV_HAL_IMPL_VSX_REDUCE_OP_4(v_float32x4, vec_float4, float, min, vec_min) + +inline uint64 v_reduce_sum(const v_uint64x2& a) +{ + return vec_extract(vec_add(a.val, vec_permi(a.val, a.val, 3)), 0); +} +inline int64 v_reduce_sum(const v_int64x2& a) +{ + return vec_extract(vec_add(a.val, vec_permi(a.val, a.val, 3)), 0); +} +inline double v_reduce_sum(const v_float64x2& a) +{ + return vec_extract(vec_add(a.val, vec_permi(a.val, a.val, 3)), 0); +} + +#define OPENCV_HAL_IMPL_VSX_REDUCE_OP_8(_Tpvec, _Tpvec2, scalartype, suffix, func) \ +inline scalartype v_reduce_##suffix(const _Tpvec& a) \ +{ \ + _Tpvec2 rs = func(a.val, vec_sld(a.val, a.val, 8)); \ + rs = func(rs, vec_sld(rs, rs, 4)); \ + return vec_extract(func(rs, vec_sld(rs, rs, 2)), 0); \ +} +OPENCV_HAL_IMPL_VSX_REDUCE_OP_8(v_uint16x8, vec_ushort8, ushort, max, vec_max) +OPENCV_HAL_IMPL_VSX_REDUCE_OP_8(v_uint16x8, vec_ushort8, ushort, min, vec_min) +OPENCV_HAL_IMPL_VSX_REDUCE_OP_8(v_int16x8, vec_short8, short, max, vec_max) +OPENCV_HAL_IMPL_VSX_REDUCE_OP_8(v_int16x8, vec_short8, short, min, vec_min) + +#define OPENCV_HAL_IMPL_VSX_REDUCE_OP_16(_Tpvec, _Tpvec2, scalartype, suffix, func) \ +inline scalartype v_reduce_##suffix(const _Tpvec& a) \ +{ \ + _Tpvec2 rs = func(a.val, vec_sld(a.val, a.val, 8)); \ + rs = func(rs, vec_sld(rs, rs, 4)); \ + rs = func(rs, vec_sld(rs, rs, 2)); \ + return vec_extract(func(rs, vec_sld(rs, rs, 1)), 0); \ +} +OPENCV_HAL_IMPL_VSX_REDUCE_OP_16(v_uint8x16, vec_uchar16, uchar, max, vec_max) +OPENCV_HAL_IMPL_VSX_REDUCE_OP_16(v_uint8x16, vec_uchar16, uchar, min, vec_min) +OPENCV_HAL_IMPL_VSX_REDUCE_OP_16(v_int8x16, vec_char16, schar, max, vec_max) +OPENCV_HAL_IMPL_VSX_REDUCE_OP_16(v_int8x16, vec_char16, schar, min, vec_min) + +inline v_float32x4 v_reduce_sum4(const v_float32x4& a, const v_float32x4& b, + const v_float32x4& c, const v_float32x4& d) +{ + vec_float4 ac = vec_add(vec_mergel(a.val, c.val), vec_mergeh(a.val, c.val)); + ac = vec_add(ac, vec_sld(ac, ac, 8)); + + vec_float4 bd = vec_add(vec_mergel(b.val, d.val), vec_mergeh(b.val, d.val)); + bd = vec_add(bd, vec_sld(bd, bd, 8)); + return v_float32x4(vec_mergeh(ac, bd)); +} + +inline unsigned v_reduce_sad(const v_uint8x16& a, const v_uint8x16& b) +{ + const vec_uint4 zero4 = vec_uint4_z; + vec_uint4 sum4 = vec_sum4s(vec_absd(a.val, b.val), zero4); + return (unsigned)vec_extract(vec_sums(vec_int4_c(sum4), vec_int4_c(zero4)), 3); +} +inline unsigned v_reduce_sad(const v_int8x16& a, const v_int8x16& b) +{ + const vec_int4 zero4 = vec_int4_z; + vec_char16 ad = vec_abss(vec_subs(a.val, b.val)); + vec_int4 sum4 = vec_sum4s(ad, zero4); + return (unsigned)vec_extract(vec_sums(sum4, zero4), 3); +} +inline unsigned v_reduce_sad(const v_uint16x8& a, const v_uint16x8& b) +{ + vec_ushort8 ad = vec_absd(a.val, b.val); + VSX_UNUSED(vec_int4) sum = vec_sums(vec_int4_c(vec_unpackhu(ad)) + vec_int4_c(vec_unpacklu(ad)), vec_int4_z); + return (unsigned)vec_extract(sum, 3); +} +inline unsigned v_reduce_sad(const v_int16x8& a, const v_int16x8& b) +{ + const vec_int4 zero4 = vec_int4_z; + vec_short8 ad = vec_abss(vec_subs(a.val, b.val)); + vec_int4 sum4 = vec_sum4s(ad, zero4); + return (unsigned)vec_extract(vec_sums(sum4, zero4), 3); +} +inline unsigned v_reduce_sad(const v_uint32x4& a, const v_uint32x4& b) +{ + const vec_uint4 ad = vec_absd(a.val, b.val); + const vec_uint4 rd = vec_add(ad, vec_sld(ad, ad, 8)); + return vec_extract(vec_add(rd, vec_sld(rd, rd, 4)), 0); +} +inline unsigned v_reduce_sad(const v_int32x4& a, const v_int32x4& b) +{ + vec_int4 ad = vec_abss(vec_sub(a.val, b.val)); + return (unsigned)vec_extract(vec_sums(ad, vec_int4_z), 3); +} +inline float v_reduce_sad(const v_float32x4& a, const v_float32x4& b) +{ + const vec_float4 ad = vec_abs(vec_sub(a.val, b.val)); + const vec_float4 rd = vec_add(ad, vec_sld(ad, ad, 8)); + return vec_extract(vec_add(rd, vec_sld(rd, rd, 4)), 0); +} + +/** Popcount **/ +inline v_uint8x16 v_popcount(const v_uint8x16& a) +{ return v_uint8x16(vec_popcntu(a.val)); } +inline v_uint8x16 v_popcount(const v_int8x16& a) +{ return v_uint8x16(vec_popcntu(a.val)); } +inline v_uint16x8 v_popcount(const v_uint16x8& a) +{ return v_uint16x8(vec_popcntu(a.val)); } +inline v_uint16x8 v_popcount(const v_int16x8& a) +{ return v_uint16x8(vec_popcntu(a.val)); } +inline v_uint32x4 v_popcount(const v_uint32x4& a) +{ return v_uint32x4(vec_popcntu(a.val)); } +inline v_uint32x4 v_popcount(const v_int32x4& a) +{ return v_uint32x4(vec_popcntu(a.val)); } +inline v_uint64x2 v_popcount(const v_uint64x2& a) +{ return v_uint64x2(vec_popcntu(a.val)); } +inline v_uint64x2 v_popcount(const v_int64x2& a) +{ return v_uint64x2(vec_popcntu(a.val)); } + +/** Mask **/ +inline int v_signmask(const v_uint8x16& a) +{ + static const vec_uchar16 qperm = {120, 112, 104, 96, 88, 80, 72, 64, 56, 48, 40, 32, 24, 16, 8, 0}; + return vec_extract((vec_int4)vec_vbpermq(v_reinterpret_as_u8(a).val, qperm), 2); +} +inline int v_signmask(const v_int8x16& a) +{ return v_signmask(v_reinterpret_as_u8(a)); } + +inline int v_signmask(const v_int16x8& a) +{ + static const vec_uchar16 qperm = {112, 96, 80, 64, 48, 32, 16, 0, 128, 128, 128, 128, 128, 128, 128, 128}; + return vec_extract((vec_int4)vec_vbpermq(v_reinterpret_as_u8(a).val, qperm), 2); +} +inline int v_signmask(const v_uint16x8& a) +{ return v_signmask(v_reinterpret_as_s16(a)); } + +inline int v_signmask(const v_int32x4& a) +{ + static const vec_uchar16 qperm = {96, 64, 32, 0, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128}; + return vec_extract((vec_int4)vec_vbpermq(v_reinterpret_as_u8(a).val, qperm), 2); +} +inline int v_signmask(const v_uint32x4& a) +{ return v_signmask(v_reinterpret_as_s32(a)); } +inline int v_signmask(const v_float32x4& a) +{ return v_signmask(v_reinterpret_as_s32(a)); } + +inline int v_signmask(const v_int64x2& a) +{ + VSX_UNUSED(const vec_dword2) sv = vec_sr(a.val, vec_udword2_sp(63)); + return (int)vec_extract(sv, 0) | (int)vec_extract(sv, 1) << 1; +} +inline int v_signmask(const v_uint64x2& a) +{ return v_signmask(v_reinterpret_as_s64(a)); } +inline int v_signmask(const v_float64x2& a) +{ return v_signmask(v_reinterpret_as_s64(a)); } + +inline int v_scan_forward(const v_int8x16& a) { return trailingZeros32(v_signmask(a)); } +inline int v_scan_forward(const v_uint8x16& a) { return trailingZeros32(v_signmask(a)); } +inline int v_scan_forward(const v_int16x8& a) { return trailingZeros32(v_signmask(a)); } +inline int v_scan_forward(const v_uint16x8& a) { return trailingZeros32(v_signmask(a)); } +inline int v_scan_forward(const v_int32x4& a) { return trailingZeros32(v_signmask(a)); } +inline int v_scan_forward(const v_uint32x4& a) { return trailingZeros32(v_signmask(a)); } +inline int v_scan_forward(const v_float32x4& a) { return trailingZeros32(v_signmask(a)); } +inline int v_scan_forward(const v_int64x2& a) { return trailingZeros32(v_signmask(a)); } +inline int v_scan_forward(const v_uint64x2& a) { return trailingZeros32(v_signmask(a)); } +inline int v_scan_forward(const v_float64x2& a) { return trailingZeros32(v_signmask(a)); } + +template +inline bool v_check_all(const _Tpvec& a) +{ return vec_all_lt(a.val, _Tpvec().val); } +inline bool v_check_all(const v_uint8x16& a) +{ return v_check_all(v_reinterpret_as_s8(a)); } +inline bool v_check_all(const v_uint16x8& a) +{ return v_check_all(v_reinterpret_as_s16(a)); } +inline bool v_check_all(const v_uint32x4& a) +{ return v_check_all(v_reinterpret_as_s32(a)); } +inline bool v_check_all(const v_uint64x2& a) +{ return v_check_all(v_reinterpret_as_s64(a)); } +inline bool v_check_all(const v_float32x4& a) +{ return v_check_all(v_reinterpret_as_s32(a)); } +inline bool v_check_all(const v_float64x2& a) +{ return v_check_all(v_reinterpret_as_s64(a)); } + +template +inline bool v_check_any(const _Tpvec& a) +{ return vec_any_lt(a.val, _Tpvec().val); } +inline bool v_check_any(const v_uint8x16& a) +{ return v_check_any(v_reinterpret_as_s8(a)); } +inline bool v_check_any(const v_uint16x8& a) +{ return v_check_any(v_reinterpret_as_s16(a)); } +inline bool v_check_any(const v_uint32x4& a) +{ return v_check_any(v_reinterpret_as_s32(a)); } +inline bool v_check_any(const v_uint64x2& a) +{ return v_check_any(v_reinterpret_as_s64(a)); } +inline bool v_check_any(const v_float32x4& a) +{ return v_check_any(v_reinterpret_as_s32(a)); } +inline bool v_check_any(const v_float64x2& a) +{ return v_check_any(v_reinterpret_as_s64(a)); } + +////////// Other math ///////// + +/** Some frequent operations **/ +inline v_float32x4 v_sqrt(const v_float32x4& x) +{ return v_float32x4(vec_sqrt(x.val)); } +inline v_float64x2 v_sqrt(const v_float64x2& x) +{ return v_float64x2(vec_sqrt(x.val)); } + +inline v_float32x4 v_invsqrt(const v_float32x4& x) +{ return v_float32x4(vec_rsqrt(x.val)); } +inline v_float64x2 v_invsqrt(const v_float64x2& x) +{ return v_float64x2(vec_rsqrt(x.val)); } + +#define OPENCV_HAL_IMPL_VSX_MULADD(_Tpvec) \ +inline _Tpvec v_magnitude(const _Tpvec& a, const _Tpvec& b) \ +{ return _Tpvec(vec_sqrt(vec_madd(a.val, a.val, vec_mul(b.val, b.val)))); } \ +inline _Tpvec v_sqr_magnitude(const _Tpvec& a, const _Tpvec& b) \ +{ return _Tpvec(vec_madd(a.val, a.val, vec_mul(b.val, b.val))); } \ +inline _Tpvec v_fma(const _Tpvec& a, const _Tpvec& b, const _Tpvec& c) \ +{ return _Tpvec(vec_madd(a.val, b.val, c.val)); } \ +inline _Tpvec v_muladd(const _Tpvec& a, const _Tpvec& b, const _Tpvec& c) \ +{ return _Tpvec(vec_madd(a.val, b.val, c.val)); } + +OPENCV_HAL_IMPL_VSX_MULADD(v_float32x4) +OPENCV_HAL_IMPL_VSX_MULADD(v_float64x2) + +inline v_int32x4 v_muladd(const v_int32x4& a, const v_int32x4& b, const v_int32x4& c) +{ return a * b + c; } + +// TODO: exp, log, sin, cos + +/** Absolute values **/ +inline v_uint8x16 v_abs(const v_int8x16& x) +{ return v_uint8x16(vec_uchar16_c(vec_abs(x.val))); } + +inline v_uint16x8 v_abs(const v_int16x8& x) +{ return v_uint16x8(vec_ushort8_c(vec_abs(x.val))); } + +inline v_uint32x4 v_abs(const v_int32x4& x) +{ return v_uint32x4(vec_uint4_c(vec_abs(x.val))); } + +inline v_float32x4 v_abs(const v_float32x4& x) +{ return v_float32x4(vec_abs(x.val)); } + +inline v_float64x2 v_abs(const v_float64x2& x) +{ return v_float64x2(vec_abs(x.val)); } + +/** Absolute difference **/ +// unsigned +OPENCV_HAL_IMPL_VSX_BIN_FUNC(v_absdiff, vec_absd) + +inline v_uint8x16 v_absdiff(const v_int8x16& a, const v_int8x16& b) +{ return v_reinterpret_as_u8(v_sub_wrap(v_max(a, b), v_min(a, b))); } +inline v_uint16x8 v_absdiff(const v_int16x8& a, const v_int16x8& b) +{ return v_reinterpret_as_u16(v_sub_wrap(v_max(a, b), v_min(a, b))); } +inline v_uint32x4 v_absdiff(const v_int32x4& a, const v_int32x4& b) +{ return v_reinterpret_as_u32(v_max(a, b) - v_min(a, b)); } + +inline v_float32x4 v_absdiff(const v_float32x4& a, const v_float32x4& b) +{ return v_abs(a - b); } +inline v_float64x2 v_absdiff(const v_float64x2& a, const v_float64x2& b) +{ return v_abs(a - b); } + +/** Absolute difference for signed integers **/ +inline v_int8x16 v_absdiffs(const v_int8x16& a, const v_int8x16& b) +{ return v_int8x16(vec_abss(vec_subs(a.val, b.val))); } +inline v_int16x8 v_absdiffs(const v_int16x8& a, const v_int16x8& b) +{ return v_int16x8(vec_abss(vec_subs(a.val, b.val))); } + +////////// Conversions ///////// + +/** Rounding **/ +inline v_int32x4 v_round(const v_float32x4& a) +{ return v_int32x4(vec_cts(vec_rint(a.val))); } + +inline v_int32x4 v_round(const v_float64x2& a) +{ return v_int32x4(vec_mergesqo(vec_ctso(vec_rint(a.val)), vec_int4_z)); } + +inline v_int32x4 v_round(const v_float64x2& a, const v_float64x2& b) +{ return v_int32x4(vec_mergesqo(vec_ctso(vec_rint(a.val)), vec_ctso(vec_rint(b.val)))); } + +inline v_int32x4 v_floor(const v_float32x4& a) +{ return v_int32x4(vec_cts(vec_floor(a.val))); } + +inline v_int32x4 v_floor(const v_float64x2& a) +{ return v_int32x4(vec_mergesqo(vec_ctso(vec_floor(a.val)), vec_int4_z)); } + +inline v_int32x4 v_ceil(const v_float32x4& a) +{ return v_int32x4(vec_cts(vec_ceil(a.val))); } + +inline v_int32x4 v_ceil(const v_float64x2& a) +{ return v_int32x4(vec_mergesqo(vec_ctso(vec_ceil(a.val)), vec_int4_z)); } + +inline v_int32x4 v_trunc(const v_float32x4& a) +{ return v_int32x4(vec_cts(a.val)); } + +inline v_int32x4 v_trunc(const v_float64x2& a) +{ return v_int32x4(vec_mergesqo(vec_ctso(a.val), vec_int4_z)); } + +/** To float **/ +inline v_float32x4 v_cvt_f32(const v_int32x4& a) +{ return v_float32x4(vec_ctf(a.val)); } + +inline v_float32x4 v_cvt_f32(const v_float64x2& a) +{ return v_float32x4(vec_mergesqo(vec_cvfo(a.val), vec_float4_z)); } + +inline v_float32x4 v_cvt_f32(const v_float64x2& a, const v_float64x2& b) +{ return v_float32x4(vec_mergesqo(vec_cvfo(a.val), vec_cvfo(b.val))); } + +inline v_float64x2 v_cvt_f64(const v_int32x4& a) +{ return v_float64x2(vec_ctdo(vec_mergeh(a.val, a.val))); } + +inline v_float64x2 v_cvt_f64_high(const v_int32x4& a) +{ return v_float64x2(vec_ctdo(vec_mergel(a.val, a.val))); } + +inline v_float64x2 v_cvt_f64(const v_float32x4& a) +{ return v_float64x2(vec_cvfo(vec_mergeh(a.val, a.val))); } + +inline v_float64x2 v_cvt_f64_high(const v_float32x4& a) +{ return v_float64x2(vec_cvfo(vec_mergel(a.val, a.val))); } + +inline v_float64x2 v_cvt_f64(const v_int64x2& a) +{ return v_float64x2(vec_ctd(a.val)); } + +////////////// Lookup table access //////////////////// + +inline v_int8x16 v_lut(const schar* tab, const int* idx) +{ + return v_int8x16(tab[idx[0]], tab[idx[1]], tab[idx[2]], tab[idx[3]], tab[idx[4]], tab[idx[5]], tab[idx[6]], tab[idx[7]], + tab[idx[8]], tab[idx[9]], tab[idx[10]], tab[idx[11]], tab[idx[12]], tab[idx[13]], tab[idx[14]], tab[idx[15]]); +} +inline v_int8x16 v_lut_pairs(const schar* tab, const int* idx) +{ + return v_reinterpret_as_s8(v_int16x8(*(const short*)(tab+idx[0]), *(const short*)(tab+idx[1]), *(const short*)(tab+idx[2]), *(const short*)(tab+idx[3]), + *(const short*)(tab+idx[4]), *(const short*)(tab+idx[5]), *(const short*)(tab+idx[6]), *(const short*)(tab+idx[7]))); +} +inline v_int8x16 v_lut_quads(const schar* tab, const int* idx) +{ + return v_reinterpret_as_s8(v_int32x4(*(const int*)(tab+idx[0]), *(const int*)(tab+idx[1]), *(const int*)(tab+idx[2]), *(const int*)(tab+idx[3]))); +} +inline v_uint8x16 v_lut(const uchar* tab, const int* idx) { return v_reinterpret_as_u8(v_lut((const schar*)tab, idx)); } +inline v_uint8x16 v_lut_pairs(const uchar* tab, const int* idx) { return v_reinterpret_as_u8(v_lut_pairs((const schar*)tab, idx)); } +inline v_uint8x16 v_lut_quads(const uchar* tab, const int* idx) { return v_reinterpret_as_u8(v_lut_quads((const schar*)tab, idx)); } + +inline v_int16x8 v_lut(const short* tab, const int* idx) +{ + return v_int16x8(tab[idx[0]], tab[idx[1]], tab[idx[2]], tab[idx[3]], tab[idx[4]], tab[idx[5]], tab[idx[6]], tab[idx[7]]); +} +inline v_int16x8 v_lut_pairs(const short* tab, const int* idx) +{ + return v_reinterpret_as_s16(v_int32x4(*(const int*)(tab + idx[0]), *(const int*)(tab + idx[1]), *(const int*)(tab + idx[2]), *(const int*)(tab + idx[3]))); +} +inline v_int16x8 v_lut_quads(const short* tab, const int* idx) +{ + return v_reinterpret_as_s16(v_int64x2(*(const int64*)(tab + idx[0]), *(const int64*)(tab + idx[1]))); +} +inline v_uint16x8 v_lut(const ushort* tab, const int* idx) { return v_reinterpret_as_u16(v_lut((const short*)tab, idx)); } +inline v_uint16x8 v_lut_pairs(const ushort* tab, const int* idx) { return v_reinterpret_as_u16(v_lut_pairs((const short*)tab, idx)); } +inline v_uint16x8 v_lut_quads(const ushort* tab, const int* idx) { return v_reinterpret_as_u16(v_lut_quads((const short*)tab, idx)); } + +inline v_int32x4 v_lut(const int* tab, const int* idx) +{ + return v_int32x4(tab[idx[0]], tab[idx[1]], tab[idx[2]], tab[idx[3]]); +} +inline v_int32x4 v_lut_pairs(const int* tab, const int* idx) +{ + return v_reinterpret_as_s32(v_int64x2(*(const int64*)(tab + idx[0]), *(const int64*)(tab + idx[1]))); +} +inline v_int32x4 v_lut_quads(const int* tab, const int* idx) +{ + return v_int32x4(vsx_ld(0, tab + idx[0])); +} +inline v_uint32x4 v_lut(const unsigned* tab, const int* idx) { return v_reinterpret_as_u32(v_lut((const int*)tab, idx)); } +inline v_uint32x4 v_lut_pairs(const unsigned* tab, const int* idx) { return v_reinterpret_as_u32(v_lut_pairs((const int*)tab, idx)); } +inline v_uint32x4 v_lut_quads(const unsigned* tab, const int* idx) { return v_reinterpret_as_u32(v_lut_quads((const int*)tab, idx)); } + +inline v_int64x2 v_lut(const int64_t* tab, const int* idx) +{ + return v_int64x2(tab[idx[0]], tab[idx[1]]); +} +inline v_int64x2 v_lut_pairs(const int64_t* tab, const int* idx) +{ + return v_int64x2(vsx_ld2(0, tab + idx[0])); +} +inline v_uint64x2 v_lut(const uint64_t* tab, const int* idx) { return v_reinterpret_as_u64(v_lut((const int64_t *)tab, idx)); } +inline v_uint64x2 v_lut_pairs(const uint64_t* tab, const int* idx) { return v_reinterpret_as_u64(v_lut_pairs((const int64_t *)tab, idx)); } + +inline v_float32x4 v_lut(const float* tab, const int* idx) +{ + return v_float32x4(tab[idx[0]], tab[idx[1]], tab[idx[2]], tab[idx[3]]); +} +inline v_float32x4 v_lut_pairs(const float* tab, const int* idx) { return v_reinterpret_as_f32(v_lut_pairs((const int*)tab, idx)); } +inline v_float32x4 v_lut_quads(const float* tab, const int* idx) { return v_load(tab + *idx); } + +inline v_float64x2 v_lut(const double* tab, const int* idx) +{ + return v_float64x2(tab[idx[0]], tab[idx[1]]); +} +inline v_float64x2 v_lut_pairs(const double* tab, const int* idx) { return v_load(tab + *idx); } + +inline v_int32x4 v_lut(const int* tab, const v_int32x4& idxvec) +{ + const int idx[4] = { + vec_extract(idxvec.val, 0), + vec_extract(idxvec.val, 1), + vec_extract(idxvec.val, 2), + vec_extract(idxvec.val, 3) + }; + return v_int32x4(tab[idx[0]], tab[idx[1]], tab[idx[2]], tab[idx[3]]); +} + +inline v_uint32x4 v_lut(const unsigned* tab, const v_int32x4& idxvec) +{ + const int idx[4] = { + vec_extract(idxvec.val, 0), + vec_extract(idxvec.val, 1), + vec_extract(idxvec.val, 2), + vec_extract(idxvec.val, 3) + }; + return v_uint32x4(tab[idx[0]], tab[idx[1]], tab[idx[2]], tab[idx[3]]); +} + +inline v_float32x4 v_lut(const float* tab, const v_int32x4& idxvec) +{ + const int idx[4] = { + vec_extract(idxvec.val, 0), + vec_extract(idxvec.val, 1), + vec_extract(idxvec.val, 2), + vec_extract(idxvec.val, 3) + }; + return v_float32x4(tab[idx[0]], tab[idx[1]], tab[idx[2]], tab[idx[3]]); +} + +inline v_float64x2 v_lut(const double* tab, const v_int32x4& idxvec) +{ + const int idx[2] = { + vec_extract(idxvec.val, 0), + vec_extract(idxvec.val, 1) + }; + return v_float64x2(tab[idx[0]], tab[idx[1]]); +} + +inline void v_lut_deinterleave(const float* tab, const v_int32x4& idxvec, v_float32x4& x, v_float32x4& y) +{ + vec_float4 xy0 = vec_ld_l8(tab + vec_extract(idxvec.val, 0)); + vec_float4 xy1 = vec_ld_l8(tab + vec_extract(idxvec.val, 1)); + vec_float4 xy2 = vec_ld_l8(tab + vec_extract(idxvec.val, 2)); + vec_float4 xy3 = vec_ld_l8(tab + vec_extract(idxvec.val, 3)); + vec_float4 xy02 = vec_mergeh(xy0, xy2); // x0, x2, y0, y2 + vec_float4 xy13 = vec_mergeh(xy1, xy3); // x1, x3, y1, y3 + x.val = vec_mergeh(xy02, xy13); + y.val = vec_mergel(xy02, xy13); +} +inline void v_lut_deinterleave(const double* tab, const v_int32x4& idxvec, v_float64x2& x, v_float64x2& y) +{ + vec_double2 xy0 = vsx_ld(vec_extract(idxvec.val, 0), tab); + vec_double2 xy1 = vsx_ld(vec_extract(idxvec.val, 1), tab); + x.val = vec_mergeh(xy0, xy1); + y.val = vec_mergel(xy0, xy1); +} + +inline v_int8x16 v_interleave_pairs(const v_int8x16& vec) +{ + static const vec_uchar16 perm = {0, 2, 1, 3, 4, 6, 5, 7, 8, 10, 9, 11, 12, 14, 13, 15}; + return v_int8x16(vec_perm(vec.val, vec.val, perm)); +} +inline v_uint8x16 v_interleave_pairs(const v_uint8x16& vec) +{ return v_reinterpret_as_u8(v_interleave_pairs(v_reinterpret_as_s8(vec))); } + +inline v_int8x16 v_interleave_quads(const v_int8x16& vec) +{ + static const vec_uchar16 perm = {0, 4, 1, 5, 2, 6, 3, 7, 8, 12, 9, 13, 10, 14, 11, 15}; + return v_int8x16(vec_perm(vec.val, vec.val, perm)); +} +inline v_uint8x16 v_interleave_quads(const v_uint8x16& vec) +{ return v_reinterpret_as_u8(v_interleave_quads(v_reinterpret_as_s8(vec))); } + +inline v_int16x8 v_interleave_pairs(const v_int16x8& vec) +{ + static const vec_uchar16 perm = {0,1, 4,5, 2,3, 6,7, 8,9, 12,13, 10,11, 14,15}; + return v_int16x8(vec_perm(vec.val, vec.val, perm)); +} +inline v_uint16x8 v_interleave_pairs(const v_uint16x8& vec) +{ return v_reinterpret_as_u16(v_interleave_pairs(v_reinterpret_as_s16(vec))); } + +inline v_int16x8 v_interleave_quads(const v_int16x8& vec) +{ + static const vec_uchar16 perm = {0,1, 8,9, 2,3, 10,11, 4,5, 12,13, 6,7, 14,15}; + return v_int16x8(vec_perm(vec.val, vec.val, perm)); +} +inline v_uint16x8 v_interleave_quads(const v_uint16x8& vec) +{ return v_reinterpret_as_u16(v_interleave_quads(v_reinterpret_as_s16(vec))); } + +inline v_int32x4 v_interleave_pairs(const v_int32x4& vec) +{ + static const vec_uchar16 perm = {0,1,2,3, 8,9,10,11, 4,5,6,7, 12,13,14,15}; + return v_int32x4(vec_perm(vec.val, vec.val, perm)); +} +inline v_uint32x4 v_interleave_pairs(const v_uint32x4& vec) +{ return v_reinterpret_as_u32(v_interleave_pairs(v_reinterpret_as_s32(vec))); } +inline v_float32x4 v_interleave_pairs(const v_float32x4& vec) +{ return v_reinterpret_as_f32(v_interleave_pairs(v_reinterpret_as_s32(vec))); } + +inline v_int8x16 v_pack_triplets(const v_int8x16& vec) +{ + static const vec_uchar16 perm = {0, 1, 2, 4, 5, 6, 8, 9, 10, 12, 13, 14, 15, 15, 15, 15}; + return v_int8x16(vec_perm(vec.val, vec.val, perm)); +} +inline v_uint8x16 v_pack_triplets(const v_uint8x16& vec) +{ return v_reinterpret_as_u8(v_pack_triplets(v_reinterpret_as_s8(vec))); } + +inline v_int16x8 v_pack_triplets(const v_int16x8& vec) +{ + static const vec_uchar16 perm = {0,1, 2,3, 4,5, 8,9, 10,11, 12,13, 14,15, 14,15}; + return v_int16x8(vec_perm(vec.val, vec.val, perm)); +} +inline v_uint16x8 v_pack_triplets(const v_uint16x8& vec) +{ return v_reinterpret_as_u16(v_pack_triplets(v_reinterpret_as_s16(vec))); } + +inline v_int32x4 v_pack_triplets(const v_int32x4& vec) +{ return vec; } +inline v_uint32x4 v_pack_triplets(const v_uint32x4& vec) +{ return vec; } +inline v_float32x4 v_pack_triplets(const v_float32x4& vec) +{ return vec; } + +/////// FP16 support //////// + +inline v_float32x4 v_load_expand(const float16_t* ptr) +{ + vec_ushort8 vf16 = vec_ld_l8((const ushort*)ptr); +#if CV_VSX3 && defined(vec_extract_fp_from_shorth) + return v_float32x4(vec_extract_fp_from_shorth(vf16)); +#elif CV_VSX3 && !defined(CV_COMPILER_VSX_BROKEN_ASM) + vec_float4 vf32; + __asm__ __volatile__ ("xvcvhpsp %x0,%x1" : "=wa" (vf32) : "wa" (vec_mergeh(vf16, vf16))); + return v_float32x4(vf32); +#else + const vec_int4 z = vec_int4_z, delta = vec_int4_sp(0x38000000); + const vec_int4 signmask = vec_int4_sp(0x80000000); + const vec_int4 maxexp = vec_int4_sp(0x7c000000); + const vec_float4 deltaf = vec_float4_c(vec_int4_sp(0x38800000)); + + vec_int4 bits = vec_int4_c(vec_mergeh(vec_short8_c(z), vec_short8_c(vf16))); + vec_int4 e = vec_and(bits, maxexp), sign = vec_and(bits, signmask); + vec_int4 t = vec_add(vec_sr(vec_xor(bits, sign), vec_uint4_sp(3)), delta); // ((h & 0x7fff) << 13) + delta + vec_int4 zt = vec_int4_c(vec_sub(vec_float4_c(vec_add(t, vec_int4_sp(1 << 23))), deltaf)); + + t = vec_add(t, vec_and(delta, vec_cmpeq(maxexp, e))); + vec_bint4 zmask = vec_cmpeq(e, z); + vec_int4 ft = vec_sel(t, zt, zmask); + return v_float32x4(vec_float4_c(vec_or(ft, sign))); +#endif +} + +inline void v_pack_store(float16_t* ptr, const v_float32x4& v) +{ +// fixme: Is there any builtin op or intrinsic that cover "xvcvsphp"? +#if CV_VSX3 && !defined(CV_COMPILER_VSX_BROKEN_ASM) + vec_ushort8 vf16; + __asm__ __volatile__ ("xvcvsphp %x0,%x1" : "=wa" (vf16) : "wa" (v.val)); + vec_st_l8(vec_mergesqe(vf16, vf16), ptr); +#else + const vec_int4 signmask = vec_int4_sp(0x80000000); + const vec_int4 rval = vec_int4_sp(0x3f000000); + + vec_int4 t = vec_int4_c(v.val); + vec_int4 sign = vec_sra(vec_and(t, signmask), vec_uint4_sp(16)); + t = vec_and(vec_nor(signmask, signmask), t); + + vec_bint4 finitemask = vec_cmpgt(vec_int4_sp(0x47800000), t); + vec_bint4 isnan = vec_cmpgt(t, vec_int4_sp(0x7f800000)); + vec_int4 naninf = vec_sel(vec_int4_sp(0x7c00), vec_int4_sp(0x7e00), isnan); + vec_bint4 tinymask = vec_cmpgt(vec_int4_sp(0x38800000), t); + vec_int4 tt = vec_int4_c(vec_add(vec_float4_c(t), vec_float4_c(rval))); + tt = vec_sub(tt, rval); + vec_int4 odd = vec_and(vec_sr(t, vec_uint4_sp(13)), vec_int4_sp(1)); + vec_int4 nt = vec_add(t, vec_int4_sp(0xc8000fff)); + nt = vec_sr(vec_add(nt, odd), vec_uint4_sp(13)); + t = vec_sel(nt, tt, tinymask); + t = vec_sel(naninf, t, finitemask); + t = vec_or(t, sign); + vec_st_l8(vec_packs(t, t), ptr); +#endif +} + +inline void v_cleanup() {} + + +/** Reinterpret **/ +/** its up there with load and store operations **/ + +////////// Matrix operations ///////// + +//////// Dot Product //////// +// 16 >> 32 +inline v_int32x4 v_dotprod(const v_int16x8& a, const v_int16x8& b) +{ return v_int32x4(vec_msum(a.val, b.val, vec_int4_z)); } +inline v_int32x4 v_dotprod(const v_int16x8& a, const v_int16x8& b, const v_int32x4& c) +{ return v_int32x4(vec_msum(a.val, b.val, c.val)); } + +// 32 >> 64 +inline v_int64x2 v_dotprod(const v_int32x4& a, const v_int32x4& b) +{ + vec_dword2 even = vec_mule(a.val, b.val); + vec_dword2 odd = vec_mulo(a.val, b.val); + return v_int64x2(vec_add(even, odd)); +} +inline v_int64x2 v_dotprod(const v_int32x4& a, const v_int32x4& b, const v_int64x2& c) +{ return v_dotprod(a, b) + c; } + +// 8 >> 32 +inline v_uint32x4 v_dotprod_expand(const v_uint8x16& a, const v_uint8x16& b, const v_uint32x4& c) +{ return v_uint32x4(vec_msum(a.val, b.val, c.val)); } +inline v_uint32x4 v_dotprod_expand(const v_uint8x16& a, const v_uint8x16& b) +{ return v_uint32x4(vec_msum(a.val, b.val, vec_uint4_z)); } + +inline v_int32x4 v_dotprod_expand(const v_int8x16& a, const v_int8x16& b) +{ + const vec_ushort8 eight = vec_ushort8_sp(8); + vec_short8 a0 = vec_sra((vec_short8)vec_sld(a.val, a.val, 1), eight); // even + vec_short8 a1 = vec_sra((vec_short8)a.val, eight); // odd + vec_short8 b0 = vec_sra((vec_short8)vec_sld(b.val, b.val, 1), eight); + vec_short8 b1 = vec_sra((vec_short8)b.val, eight); + return v_int32x4(vec_msum(a0, b0, vec_msum(a1, b1, vec_int4_z))); +} + +inline v_int32x4 v_dotprod_expand(const v_int8x16& a, const v_int8x16& b, const v_int32x4& c) +{ + const vec_ushort8 eight = vec_ushort8_sp(8); + vec_short8 a0 = vec_sra((vec_short8)vec_sld(a.val, a.val, 1), eight); // even + vec_short8 a1 = vec_sra((vec_short8)a.val, eight); // odd + vec_short8 b0 = vec_sra((vec_short8)vec_sld(b.val, b.val, 1), eight); + vec_short8 b1 = vec_sra((vec_short8)b.val, eight); + return v_int32x4(vec_msum(a0, b0, vec_msum(a1, b1, c.val))); +} + +// 16 >> 64 +inline v_uint64x2 v_dotprod_expand(const v_uint16x8& a, const v_uint16x8& b) +{ + const vec_uint4 zero = vec_uint4_z; + vec_uint4 even = vec_mule(a.val, b.val); + vec_uint4 odd = vec_mulo(a.val, b.val); + vec_udword2 e0 = (vec_udword2)vec_mergee(even, zero); + vec_udword2 e1 = (vec_udword2)vec_mergeo(even, zero); + vec_udword2 o0 = (vec_udword2)vec_mergee(odd, zero); + vec_udword2 o1 = (vec_udword2)vec_mergeo(odd, zero); + vec_udword2 s0 = vec_add(e0, o0); + vec_udword2 s1 = vec_add(e1, o1); + return v_uint64x2(vec_add(s0, s1)); +} +inline v_uint64x2 v_dotprod_expand(const v_uint16x8& a, const v_uint16x8& b, const v_uint64x2& c) +{ return v_dotprod_expand(a, b) + c; } + +inline v_int64x2 v_dotprod_expand(const v_int16x8& a, const v_int16x8& b) +{ + v_int32x4 prod = v_dotprod(a, b); + v_int64x2 c, d; + v_expand(prod, c, d); + return v_int64x2(vec_add(vec_mergeh(c.val, d.val), vec_mergel(c.val, d.val))); +} +inline v_int64x2 v_dotprod_expand(const v_int16x8& a, const v_int16x8& b, const v_int64x2& c) +{ return v_dotprod_expand(a, b) + c; } + +// 32 >> 64f +inline v_float64x2 v_dotprod_expand(const v_int32x4& a, const v_int32x4& b) +{ return v_cvt_f64(v_dotprod(a, b)); } +inline v_float64x2 v_dotprod_expand(const v_int32x4& a, const v_int32x4& b, const v_float64x2& c) +{ return v_dotprod_expand(a, b) + c; } + +//////// Fast Dot Product //////// + +// 16 >> 32 +inline v_int32x4 v_dotprod_fast(const v_int16x8& a, const v_int16x8& b) +{ return v_dotprod(a, b); } +inline v_int32x4 v_dotprod_fast(const v_int16x8& a, const v_int16x8& b, const v_int32x4& c) +{ return v_int32x4(vec_msum(a.val, b.val, vec_int4_z)) + c; } +// 32 >> 64 +inline v_int64x2 v_dotprod_fast(const v_int32x4& a, const v_int32x4& b) +{ return v_dotprod(a, b); } +inline v_int64x2 v_dotprod_fast(const v_int32x4& a, const v_int32x4& b, const v_int64x2& c) +{ return v_dotprod(a, b, c); } + +// 8 >> 32 +inline v_uint32x4 v_dotprod_expand_fast(const v_uint8x16& a, const v_uint8x16& b) +{ return v_dotprod_expand(a, b); } +inline v_uint32x4 v_dotprod_expand_fast(const v_uint8x16& a, const v_uint8x16& b, const v_uint32x4& c) +{ return v_uint32x4(vec_msum(a.val, b.val, vec_uint4_z)) + c; } + +inline v_int32x4 v_dotprod_expand_fast(const v_int8x16& a, const v_int8x16& b) +{ + vec_short8 a0 = vec_unpackh(a.val); + vec_short8 a1 = vec_unpackl(a.val); + vec_short8 b0 = vec_unpackh(b.val); + vec_short8 b1 = vec_unpackl(b.val); + return v_int32x4(vec_msum(a0, b0, vec_msum(a1, b1, vec_int4_z))); +} +inline v_int32x4 v_dotprod_expand_fast(const v_int8x16& a, const v_int8x16& b, const v_int32x4& c) +{ return v_dotprod_expand_fast(a, b) + c; } + +// 16 >> 64 +inline v_uint64x2 v_dotprod_expand_fast(const v_uint16x8& a, const v_uint16x8& b) +{ return v_dotprod_expand(a, b); } +inline v_uint64x2 v_dotprod_expand_fast(const v_uint16x8& a, const v_uint16x8& b, const v_uint64x2& c) +{ return v_dotprod_expand(a, b, c); } + +inline v_int64x2 v_dotprod_expand_fast(const v_int16x8& a, const v_int16x8& b) +{ + v_int32x4 prod = v_dotprod(a, b); + v_int64x2 c, d; + v_expand(prod, c, d); + return c + d; +} +inline v_int64x2 v_dotprod_expand_fast(const v_int16x8& a, const v_int16x8& b, const v_int64x2& c) +{ return v_dotprod_expand_fast(a, b) + c; } + +// 32 >> 64f +inline v_float64x2 v_dotprod_expand_fast(const v_int32x4& a, const v_int32x4& b) +{ return v_dotprod_expand(a, b); } +inline v_float64x2 v_dotprod_expand_fast(const v_int32x4& a, const v_int32x4& b, const v_float64x2& c) +{ return v_dotprod_expand(a, b, c); } + +inline v_float32x4 v_matmul(const v_float32x4& v, const v_float32x4& m0, + const v_float32x4& m1, const v_float32x4& m2, + const v_float32x4& m3) +{ + const vec_float4 v0 = vec_splat(v.val, 0); + const vec_float4 v1 = vec_splat(v.val, 1); + const vec_float4 v2 = vec_splat(v.val, 2); + VSX_UNUSED(const vec_float4) v3 = vec_splat(v.val, 3); + return v_float32x4(vec_madd(v0, m0.val, vec_madd(v1, m1.val, vec_madd(v2, m2.val, vec_mul(v3, m3.val))))); +} + +inline v_float32x4 v_matmuladd(const v_float32x4& v, const v_float32x4& m0, + const v_float32x4& m1, const v_float32x4& m2, + const v_float32x4& a) +{ + const vec_float4 v0 = vec_splat(v.val, 0); + const vec_float4 v1 = vec_splat(v.val, 1); + const vec_float4 v2 = vec_splat(v.val, 2); + return v_float32x4(vec_madd(v0, m0.val, vec_madd(v1, m1.val, vec_madd(v2, m2.val, a.val)))); +} + +#define OPENCV_HAL_IMPL_VSX_TRANSPOSE4x4(_Tpvec, _Tpvec2) \ +inline void v_transpose4x4(const _Tpvec& a0, const _Tpvec& a1, \ + const _Tpvec& a2, const _Tpvec& a3, \ + _Tpvec& b0, _Tpvec& b1, _Tpvec& b2, _Tpvec& b3) \ +{ \ + _Tpvec2 a02 = vec_mergeh(a0.val, a2.val); \ + _Tpvec2 a13 = vec_mergeh(a1.val, a3.val); \ + b0.val = vec_mergeh(a02, a13); \ + b1.val = vec_mergel(a02, a13); \ + a02 = vec_mergel(a0.val, a2.val); \ + a13 = vec_mergel(a1.val, a3.val); \ + b2.val = vec_mergeh(a02, a13); \ + b3.val = vec_mergel(a02, a13); \ +} +OPENCV_HAL_IMPL_VSX_TRANSPOSE4x4(v_uint32x4, vec_uint4) +OPENCV_HAL_IMPL_VSX_TRANSPOSE4x4(v_int32x4, vec_int4) +OPENCV_HAL_IMPL_VSX_TRANSPOSE4x4(v_float32x4, vec_float4) + +template +inline Tvec v_broadcast_element(const Tvec& v) +{ return Tvec(vec_splat(v.val, i)); } + + +CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END + +//! @endcond + +} + +#endif // OPENCV_HAL_VSX_HPP diff --git a/IPL/include/opencv/opencv2/core/hal/intrin_wasm.hpp b/IPL/include/opencv/opencv2/core/hal/intrin_wasm.hpp new file mode 100644 index 0000000..b8c250f --- /dev/null +++ b/IPL/include/opencv/opencv2/core/hal/intrin_wasm.hpp @@ -0,0 +1,4260 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#ifndef OPENCV_HAL_INTRIN_WASM_HPP +#define OPENCV_HAL_INTRIN_WASM_HPP + +#include +#include +#include +#include "opencv2/core/saturate.hpp" + +#define CV_SIMD128 1 +#define CV_SIMD128_64F 0 // Now all implementation of f64 use fallback, so disable it. +#define CV_SIMD128_FP16 0 + +namespace cv +{ + +//! @cond IGNORED + +CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN + +#if (__EMSCRIPTEN_major__ * 1000000 + __EMSCRIPTEN_minor__ * 1000 + __EMSCRIPTEN_tiny__) < (1038046) +// handle renames: https://github.com/emscripten-core/emscripten/pull/9440 (https://github.com/emscripten-core/emscripten/commit/755d5b46cb84d0aa120c10981b11d05646c29673) +#define wasm_i32x4_trunc_saturate_f32x4 wasm_trunc_saturate_i32x4_f32x4 +#define wasm_u32x4_trunc_saturate_f32x4 wasm_trunc_saturate_u32x4_f32x4 +#define wasm_i64x2_trunc_saturate_f64x2 wasm_trunc_saturate_i64x2_f64x2 +#define wasm_u64x2_trunc_saturate_f64x2 wasm_trunc_saturate_u64x2_f64x2 +#define wasm_f32x4_convert_i32x4 wasm_convert_f32x4_i32x4 +#define wasm_f32x4_convert_u32x4 wasm_convert_f32x4_u32x4 +#define wasm_f64x2_convert_i64x2 wasm_convert_f64x2_i64x2 +#define wasm_f64x2_convert_u64x2 wasm_convert_f64x2_u64x2 +#endif // COMPATIBILITY: <1.38.46 + +///////// Types /////////// + +struct v_uint8x16 +{ + typedef uchar lane_type; + typedef v128_t vector_type; + enum { nlanes = 16 }; + + v_uint8x16() : val(wasm_i8x16_splat(0)) {} + explicit v_uint8x16(v128_t v) : val(v) {} + v_uint8x16(uchar v0, uchar v1, uchar v2, uchar v3, uchar v4, uchar v5, uchar v6, uchar v7, + uchar v8, uchar v9, uchar v10, uchar v11, uchar v12, uchar v13, uchar v14, uchar v15) + { + uchar v[] = {v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15}; + val = wasm_v128_load(v); + } + uchar get0() const + { + return (uchar)wasm_i8x16_extract_lane(val, 0); + } + + v128_t val; +}; + +struct v_int8x16 +{ + typedef schar lane_type; + typedef v128_t vector_type; + enum { nlanes = 16 }; + + v_int8x16() : val(wasm_i8x16_splat(0)) {} + explicit v_int8x16(v128_t v) : val(v) {} + v_int8x16(schar v0, schar v1, schar v2, schar v3, schar v4, schar v5, schar v6, schar v7, + schar v8, schar v9, schar v10, schar v11, schar v12, schar v13, schar v14, schar v15) + { + schar v[] = {v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15}; + val = wasm_v128_load(v); + } + schar get0() const + { + return wasm_i8x16_extract_lane(val, 0); + } + + v128_t val; +}; + +struct v_uint16x8 +{ + typedef ushort lane_type; + typedef v128_t vector_type; + enum { nlanes = 8 }; + + v_uint16x8() : val(wasm_i16x8_splat(0)) {} + explicit v_uint16x8(v128_t v) : val(v) {} + v_uint16x8(ushort v0, ushort v1, ushort v2, ushort v3, ushort v4, ushort v5, ushort v6, ushort v7) + { + ushort v[] = {v0, v1, v2, v3, v4, v5, v6, v7}; + val = wasm_v128_load(v); + } + ushort get0() const + { + return (ushort)wasm_i16x8_extract_lane(val, 0); // wasm_u16x8_extract_lane() unimplemented yet + } + + v128_t val; +}; + +struct v_int16x8 +{ + typedef short lane_type; + typedef v128_t vector_type; + enum { nlanes = 8 }; + + v_int16x8() : val(wasm_i16x8_splat(0)) {} + explicit v_int16x8(v128_t v) : val(v) {} + v_int16x8(short v0, short v1, short v2, short v3, short v4, short v5, short v6, short v7) + { + short v[] = {v0, v1, v2, v3, v4, v5, v6, v7}; + val = wasm_v128_load(v); + } + short get0() const + { + return wasm_i16x8_extract_lane(val, 0); + } + + v128_t val; +}; + +struct v_uint32x4 +{ + typedef unsigned lane_type; + typedef v128_t vector_type; + enum { nlanes = 4 }; + + v_uint32x4() : val(wasm_i32x4_splat(0)) {} + explicit v_uint32x4(v128_t v) : val(v) {} + v_uint32x4(unsigned v0, unsigned v1, unsigned v2, unsigned v3) + { + unsigned v[] = {v0, v1, v2, v3}; + val = wasm_v128_load(v); + } + unsigned get0() const + { + return (unsigned)wasm_i32x4_extract_lane(val, 0); + } + + v128_t val; +}; + +struct v_int32x4 +{ + typedef int lane_type; + typedef v128_t vector_type; + enum { nlanes = 4 }; + + v_int32x4() : val(wasm_i32x4_splat(0)) {} + explicit v_int32x4(v128_t v) : val(v) {} + v_int32x4(int v0, int v1, int v2, int v3) + { + int v[] = {v0, v1, v2, v3}; + val = wasm_v128_load(v); + } + int get0() const + { + return wasm_i32x4_extract_lane(val, 0); + } + + v128_t val; +}; + +struct v_float32x4 +{ + typedef float lane_type; + typedef v128_t vector_type; + enum { nlanes = 4 }; + + v_float32x4() : val(wasm_f32x4_splat(0)) {} + explicit v_float32x4(v128_t v) : val(v) {} + v_float32x4(float v0, float v1, float v2, float v3) + { + float v[] = {v0, v1, v2, v3}; + val = wasm_v128_load(v); + } + float get0() const + { + return wasm_f32x4_extract_lane(val, 0); + } + + v128_t val; +}; + +struct v_uint64x2 +{ + typedef uint64 lane_type; + typedef v128_t vector_type; + enum { nlanes = 2 }; + +#ifdef __wasm_unimplemented_simd128__ + v_uint64x2() : val(wasm_i64x2_splat(0)) {} +#else + v_uint64x2() : val(wasm_i32x4_splat(0)) {} +#endif + explicit v_uint64x2(v128_t v) : val(v) {} + v_uint64x2(uint64 v0, uint64 v1) + { + uint64 v[] = {v0, v1}; + val = wasm_v128_load(v); + } + uint64 get0() const + { +#ifdef __wasm_unimplemented_simd128__ + return (uint64)wasm_i64x2_extract_lane(val, 0); +#else + uint64 des[2]; + wasm_v128_store(des, val); + return des[0]; +#endif + } + + v128_t val; +}; + +struct v_int64x2 +{ + typedef int64 lane_type; + typedef v128_t vector_type; + enum { nlanes = 2 }; + +#ifdef __wasm_unimplemented_simd128__ + v_int64x2() : val(wasm_i64x2_splat(0)) {} +#else + v_int64x2() : val(wasm_i32x4_splat(0)) {} +#endif + explicit v_int64x2(v128_t v) : val(v) {} + v_int64x2(int64 v0, int64 v1) + { + int64 v[] = {v0, v1}; + val = wasm_v128_load(v); + } + int64 get0() const + { +#ifdef __wasm_unimplemented_simd128__ + return wasm_i64x2_extract_lane(val, 0); +#else + int64 des[2]; + wasm_v128_store(des, val); + return des[0]; +#endif + } + + v128_t val; +}; + +struct v_float64x2 +{ + typedef double lane_type; + typedef v128_t vector_type; + enum { nlanes = 2 }; + +#ifdef __wasm_unimplemented_simd128__ + v_float64x2() : val(wasm_f64x2_splat(0)) {} +#else + v_float64x2() : val(wasm_f32x4_splat(0)) {} +#endif + explicit v_float64x2(v128_t v) : val(v) {} + v_float64x2(double v0, double v1) + { + double v[] = {v0, v1}; + val = wasm_v128_load(v); + } + double get0() const + { +#ifdef __wasm_unimplemented_simd128__ + return wasm_f64x2_extract_lane(val, 0); +#else + double des[2]; + wasm_v128_store(des, val); + return des[0]; +#endif + } + + v128_t val; +}; + +namespace fallback +{ + +template struct v_reg +{ + typedef _Tp lane_type; + enum { nlanes = n }; + + explicit v_reg(const _Tp* ptr) { for( int i = 0; i < n; i++ ) s[i] = ptr[i]; } + + v_reg(_Tp s0, _Tp s1) { s[0] = s0; s[1] = s1; } + + v_reg(_Tp s0, _Tp s1, _Tp s2, _Tp s3) { s[0] = s0; s[1] = s1; s[2] = s2; s[3] = s3; } + + v_reg(_Tp s0, _Tp s1, _Tp s2, _Tp s3, + _Tp s4, _Tp s5, _Tp s6, _Tp s7) + { + s[0] = s0; s[1] = s1; s[2] = s2; s[3] = s3; + s[4] = s4; s[5] = s5; s[6] = s6; s[7] = s7; + } + + v_reg(_Tp s0, _Tp s1, _Tp s2, _Tp s3, + _Tp s4, _Tp s5, _Tp s6, _Tp s7, + _Tp s8, _Tp s9, _Tp s10, _Tp s11, + _Tp s12, _Tp s13, _Tp s14, _Tp s15) + { + s[0] = s0; s[1] = s1; s[2] = s2; s[3] = s3; + s[4] = s4; s[5] = s5; s[6] = s6; s[7] = s7; + s[8] = s8; s[9] = s9; s[10] = s10; s[11] = s11; + s[12] = s12; s[13] = s13; s[14] = s14; s[15] = s15; + } + + v_reg() {} + + v_reg(const v_reg<_Tp, n> & r) + { + for( int i = 0; i < n; i++ ) + s[i] = r.s[i]; + } + + _Tp get0() const { return s[0]; } + + _Tp get(const int i) const { return s[i]; } + v_reg<_Tp, n> high() const + { + v_reg<_Tp, n> c; + int i; + for( i = 0; i < n/2; i++ ) + { + c.s[i] = s[i+(n/2)]; + c.s[i+(n/2)] = 0; + } + return c; + } + + static v_reg<_Tp, n> zero() + { + v_reg<_Tp, n> c; + for( int i = 0; i < n; i++ ) + c.s[i] = (_Tp)0; + return c; + } + + static v_reg<_Tp, n> all(_Tp s) + { + v_reg<_Tp, n> c; + for( int i = 0; i < n; i++ ) + c.s[i] = s; + return c; + } + + template v_reg<_Tp2, n2> reinterpret_as() const + { + size_t bytes = std::min(sizeof(_Tp2)*n2, sizeof(_Tp)*n); + v_reg<_Tp2, n2> c; + std::memcpy(&c.s[0], &s[0], bytes); + return c; + } + + v_reg(const cv::v_uint8x16& v) { wasm_v128_store(&s, v.val); } + v_reg(const cv::v_int8x16& v) { wasm_v128_store(&s, v.val); } + v_reg(const cv::v_uint16x8& v) { wasm_v128_store(&s, v.val); } + v_reg(const cv::v_int16x8& v) { wasm_v128_store(&s, v.val); } + v_reg(const cv::v_uint32x4& v) { wasm_v128_store(&s, v.val); } + v_reg(const cv::v_int32x4& v) { wasm_v128_store(&s, v.val); } + v_reg(const cv::v_float32x4& v) { wasm_v128_store(&s, v.val); } + v_reg(const cv::v_float64x2& v) { wasm_v128_store(&s, v.val); } + v_reg(const cv::v_uint64x2& v) { wasm_v128_store(&s, v.val); } + v_reg(const cv::v_int64x2& v) { wasm_v128_store(&s, v.val); } + + operator cv::v_uint8x16() const { return cv::v_uint8x16(wasm_v128_load(&s)); } + operator cv::v_int8x16() const { return cv::v_int8x16(wasm_v128_load(&s)); } + operator cv::v_uint16x8() const { return cv::v_uint16x8(wasm_v128_load(&s)); } + operator cv::v_int16x8() const { return cv::v_int16x8(wasm_v128_load(&s)); } + operator cv::v_uint32x4() const { return cv::v_uint32x4(wasm_v128_load(&s)); } + operator cv::v_int32x4() const { return cv::v_int32x4(wasm_v128_load(&s)); } + operator cv::v_float32x4() const { return cv::v_float32x4(wasm_v128_load(&s)); } + operator cv::v_float64x2() const { return cv::v_float64x2(wasm_v128_load(&s)); } + operator cv::v_uint64x2() const { return cv::v_uint64x2(wasm_v128_load(&s)); } + operator cv::v_int64x2() const { return cv::v_int64x2(wasm_v128_load(&s)); } + + _Tp s[n]; +}; + +typedef v_reg v_uint8x16; +typedef v_reg v_int8x16; +typedef v_reg v_uint16x8; +typedef v_reg v_int16x8; +typedef v_reg v_uint32x4; +typedef v_reg v_int32x4; +typedef v_reg v_float32x4; +typedef v_reg v_float64x2; +typedef v_reg v_uint64x2; +typedef v_reg v_int64x2; + +#define OPENCV_HAL_IMPL_BIN_OP(bin_op) \ +template inline v_reg<_Tp, n> \ + operator bin_op (const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \ +{ \ + v_reg<_Tp, n> c; \ + for( int i = 0; i < n; i++ ) \ + c.s[i] = saturate_cast<_Tp>(a.s[i] bin_op b.s[i]); \ + return c; \ +} \ +template inline v_reg<_Tp, n>& \ + operator bin_op##= (v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \ +{ \ + for( int i = 0; i < n; i++ ) \ + a.s[i] = saturate_cast<_Tp>(a.s[i] bin_op b.s[i]); \ + return a; \ +} + +OPENCV_HAL_IMPL_BIN_OP(+) +OPENCV_HAL_IMPL_BIN_OP(-) +OPENCV_HAL_IMPL_BIN_OP(*) +OPENCV_HAL_IMPL_BIN_OP(/) + +#define OPENCV_HAL_IMPL_BIT_OP(bit_op) \ +template inline v_reg<_Tp, n> operator bit_op \ + (const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \ +{ \ + v_reg<_Tp, n> c; \ + typedef typename V_TypeTraits<_Tp>::int_type itype; \ + for( int i = 0; i < n; i++ ) \ + c.s[i] = V_TypeTraits<_Tp>::reinterpret_from_int((itype)(V_TypeTraits<_Tp>::reinterpret_int(a.s[i]) bit_op \ + V_TypeTraits<_Tp>::reinterpret_int(b.s[i]))); \ + return c; \ +} \ +template inline v_reg<_Tp, n>& operator \ + bit_op##= (v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \ +{ \ + typedef typename V_TypeTraits<_Tp>::int_type itype; \ + for( int i = 0; i < n; i++ ) \ + a.s[i] = V_TypeTraits<_Tp>::reinterpret_from_int((itype)(V_TypeTraits<_Tp>::reinterpret_int(a.s[i]) bit_op \ + V_TypeTraits<_Tp>::reinterpret_int(b.s[i]))); \ + return a; \ +} + +OPENCV_HAL_IMPL_BIT_OP(&) +OPENCV_HAL_IMPL_BIT_OP(|) +OPENCV_HAL_IMPL_BIT_OP(^) + +template inline v_reg<_Tp, n> operator ~ (const v_reg<_Tp, n>& a) +{ + v_reg<_Tp, n> c; + for( int i = 0; i < n; i++ ) + { + c.s[i] = V_TypeTraits<_Tp>::reinterpret_from_int(~V_TypeTraits<_Tp>::reinterpret_int(a.s[i])); + } + return c; +} + +#define OPENCV_HAL_IMPL_MATH_FUNC(func, cfunc, _Tp2) \ +template inline v_reg<_Tp2, n> func(const v_reg<_Tp, n>& a) \ +{ \ + v_reg<_Tp2, n> c; \ + for( int i = 0; i < n; i++ ) \ + c.s[i] = cfunc(a.s[i]); \ + return c; \ +} + +OPENCV_HAL_IMPL_MATH_FUNC(v_sqrt, std::sqrt, _Tp) +OPENCV_HAL_IMPL_MATH_FUNC(v_sin, std::sin, _Tp) +OPENCV_HAL_IMPL_MATH_FUNC(v_cos, std::cos, _Tp) +OPENCV_HAL_IMPL_MATH_FUNC(v_exp, std::exp, _Tp) +OPENCV_HAL_IMPL_MATH_FUNC(v_log, std::log, _Tp) +OPENCV_HAL_IMPL_MATH_FUNC(v_abs, (typename V_TypeTraits<_Tp>::abs_type)std::abs, + typename V_TypeTraits<_Tp>::abs_type) +OPENCV_HAL_IMPL_MATH_FUNC(v_round, cvRound, int) +OPENCV_HAL_IMPL_MATH_FUNC(v_floor, cvFloor, int) +OPENCV_HAL_IMPL_MATH_FUNC(v_ceil, cvCeil, int) +OPENCV_HAL_IMPL_MATH_FUNC(v_trunc, int, int) + +#define OPENCV_HAL_IMPL_MINMAX_FUNC(func, cfunc) \ +template inline v_reg<_Tp, n> func(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \ +{ \ + v_reg<_Tp, n> c; \ + for( int i = 0; i < n; i++ ) \ + c.s[i] = cfunc(a.s[i], b.s[i]); \ + return c; \ +} + +#define OPENCV_HAL_IMPL_REDUCE_MINMAX_FUNC(func, cfunc) \ +template inline _Tp func(const v_reg<_Tp, n>& a) \ +{ \ + _Tp c = a.s[0]; \ + for( int i = 1; i < n; i++ ) \ + c = cfunc(c, a.s[i]); \ + return c; \ +} + +OPENCV_HAL_IMPL_MINMAX_FUNC(v_min, std::min) +OPENCV_HAL_IMPL_MINMAX_FUNC(v_max, std::max) +OPENCV_HAL_IMPL_REDUCE_MINMAX_FUNC(v_reduce_min, std::min) +OPENCV_HAL_IMPL_REDUCE_MINMAX_FUNC(v_reduce_max, std::max) + +static const unsigned char popCountTable[] = +{ + 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, + 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, + 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, + 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, + 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, + 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8, +}; + +template +inline v_reg::abs_type, n> v_popcount(const v_reg<_Tp, n>& a) +{ + v_reg::abs_type, n> b = v_reg::abs_type, n>::zero(); + for (int i = 0; i < (int)(n*sizeof(_Tp)); i++) + b.s[i/sizeof(_Tp)] += popCountTable[v_reinterpret_as_u8(a).s[i]]; + return b; +} + +template +inline void v_minmax( const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b, + v_reg<_Tp, n>& minval, v_reg<_Tp, n>& maxval ) +{ + for( int i = 0; i < n; i++ ) + { + minval.s[i] = std::min(a.s[i], b.s[i]); + maxval.s[i] = std::max(a.s[i], b.s[i]); + } +} + +#define OPENCV_HAL_IMPL_CMP_OP(cmp_op) \ +template \ +inline v_reg<_Tp, n> operator cmp_op(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \ +{ \ + typedef typename V_TypeTraits<_Tp>::int_type itype; \ + v_reg<_Tp, n> c; \ + for( int i = 0; i < n; i++ ) \ + c.s[i] = V_TypeTraits<_Tp>::reinterpret_from_int((itype)-(int)(a.s[i] cmp_op b.s[i])); \ + return c; \ +} + +OPENCV_HAL_IMPL_CMP_OP(<) +OPENCV_HAL_IMPL_CMP_OP(>) +OPENCV_HAL_IMPL_CMP_OP(<=) +OPENCV_HAL_IMPL_CMP_OP(>=) +OPENCV_HAL_IMPL_CMP_OP(==) +OPENCV_HAL_IMPL_CMP_OP(!=) + +template +inline v_reg v_not_nan(const v_reg& a) +{ + typedef typename V_TypeTraits::int_type itype; + v_reg c; + for (int i = 0; i < n; i++) + c.s[i] = V_TypeTraits::reinterpret_from_int((itype)-(int)(a.s[i] == a.s[i])); + return c; +} +template +inline v_reg v_not_nan(const v_reg& a) +{ + typedef typename V_TypeTraits::int_type itype; + v_reg c; + for (int i = 0; i < n; i++) + c.s[i] = V_TypeTraits::reinterpret_from_int((itype)-(int)(a.s[i] == a.s[i])); + return c; +} + +#define OPENCV_HAL_IMPL_ARITHM_OP(func, bin_op, cast_op, _Tp2) \ +template \ +inline v_reg<_Tp2, n> func(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \ +{ \ + typedef _Tp2 rtype; \ + v_reg c; \ + for( int i = 0; i < n; i++ ) \ + c.s[i] = cast_op(a.s[i] bin_op b.s[i]); \ + return c; \ +} + +OPENCV_HAL_IMPL_ARITHM_OP(v_add_wrap, +, (_Tp), _Tp) +OPENCV_HAL_IMPL_ARITHM_OP(v_sub_wrap, -, (_Tp), _Tp) +OPENCV_HAL_IMPL_ARITHM_OP(v_mul_wrap, *, (_Tp), _Tp) + +template inline T _absdiff(T a, T b) +{ + return a > b ? a - b : b - a; +} + +template +inline v_reg::abs_type, n> v_absdiff(const v_reg<_Tp, n>& a, const v_reg<_Tp, n> & b) +{ + typedef typename V_TypeTraits<_Tp>::abs_type rtype; + v_reg c; + const rtype mask = (rtype)(std::numeric_limits<_Tp>::is_signed ? (1 << (sizeof(rtype)*8 - 1)) : 0); + for( int i = 0; i < n; i++ ) + { + rtype ua = a.s[i] ^ mask; + rtype ub = b.s[i] ^ mask; + c.s[i] = _absdiff(ua, ub); + } + return c; +} + +inline v_float32x4 v_absdiff(const v_float32x4& a, const v_float32x4& b) +{ + v_float32x4 c; + for( int i = 0; i < c.nlanes; i++ ) + c.s[i] = _absdiff(a.s[i], b.s[i]); + return c; +} + +inline v_float64x2 v_absdiff(const v_float64x2& a, const v_float64x2& b) +{ + v_float64x2 c; + for( int i = 0; i < c.nlanes; i++ ) + c.s[i] = _absdiff(a.s[i], b.s[i]); + return c; +} + +template +inline v_reg<_Tp, n> v_absdiffs(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) +{ + v_reg<_Tp, n> c; + for( int i = 0; i < n; i++) + c.s[i] = saturate_cast<_Tp>(std::abs(a.s[i] - b.s[i])); + return c; +} + +template +inline v_reg<_Tp, n> v_invsqrt(const v_reg<_Tp, n>& a) +{ + v_reg<_Tp, n> c; + for( int i = 0; i < n; i++ ) + c.s[i] = 1.f/std::sqrt(a.s[i]); + return c; +} + +template +inline v_reg<_Tp, n> v_magnitude(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) +{ + v_reg<_Tp, n> c; + for( int i = 0; i < n; i++ ) + c.s[i] = std::sqrt(a.s[i]*a.s[i] + b.s[i]*b.s[i]); + return c; +} + +template +inline v_reg<_Tp, n> v_sqr_magnitude(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) +{ + v_reg<_Tp, n> c; + for( int i = 0; i < n; i++ ) + c.s[i] = a.s[i]*a.s[i] + b.s[i]*b.s[i]; + return c; +} + +template +inline v_reg<_Tp, n> v_fma(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b, + const v_reg<_Tp, n>& c) +{ + v_reg<_Tp, n> d; + for( int i = 0; i < n; i++ ) + d.s[i] = a.s[i]*b.s[i] + c.s[i]; + return d; +} + +template +inline v_reg<_Tp, n> v_muladd(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b, + const v_reg<_Tp, n>& c) +{ + return v_fma(a, b, c); +} + +template inline v_reg::w_type, n/2> + v_dotprod(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) +{ + typedef typename V_TypeTraits<_Tp>::w_type w_type; + v_reg c; + for( int i = 0; i < (n/2); i++ ) + c.s[i] = (w_type)a.s[i*2]*b.s[i*2] + (w_type)a.s[i*2+1]*b.s[i*2+1]; + return c; +} + +template inline v_reg::w_type, n/2> + v_dotprod(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b, const v_reg::w_type, n / 2>& c) +{ + typedef typename V_TypeTraits<_Tp>::w_type w_type; + v_reg s; + for( int i = 0; i < (n/2); i++ ) + s.s[i] = (w_type)a.s[i*2]*b.s[i*2] + (w_type)a.s[i*2+1]*b.s[i*2+1] + c.s[i]; + return s; +} + +template inline v_reg::q_type, n/4> + v_dotprod_expand(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) +{ + typedef typename V_TypeTraits<_Tp>::q_type q_type; + v_reg s; + for( int i = 0; i < (n/4); i++ ) + s.s[i] = (q_type)a.s[i*4 ]*b.s[i*4 ] + (q_type)a.s[i*4 + 1]*b.s[i*4 + 1] + + (q_type)a.s[i*4 + 2]*b.s[i*4 + 2] + (q_type)a.s[i*4 + 3]*b.s[i*4 + 3]; + return s; +} + +template inline v_reg::q_type, n/4> + v_dotprod_expand(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b, + const v_reg::q_type, n / 4>& c) +{ + typedef typename V_TypeTraits<_Tp>::q_type q_type; + v_reg s; + for( int i = 0; i < (n/4); i++ ) + s.s[i] = (q_type)a.s[i*4 ]*b.s[i*4 ] + (q_type)a.s[i*4 + 1]*b.s[i*4 + 1] + + (q_type)a.s[i*4 + 2]*b.s[i*4 + 2] + (q_type)a.s[i*4 + 3]*b.s[i*4 + 3] + c.s[i]; + return s; +} + +template inline void v_mul_expand(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b, + v_reg::w_type, n/2>& c, + v_reg::w_type, n/2>& d) +{ + typedef typename V_TypeTraits<_Tp>::w_type w_type; + for( int i = 0; i < (n/2); i++ ) + { + c.s[i] = (w_type)a.s[i]*b.s[i]; + d.s[i] = (w_type)a.s[i+(n/2)]*b.s[i+(n/2)]; + } +} + +template inline v_reg<_Tp, n> v_mul_hi(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) +{ + typedef typename V_TypeTraits<_Tp>::w_type w_type; + v_reg<_Tp, n> c; + for (int i = 0; i < n; i++) + c.s[i] = (_Tp)(((w_type)a.s[i] * b.s[i]) >> sizeof(_Tp)*8); + return c; +} + +template inline void v_hsum(const v_reg<_Tp, n>& a, + v_reg::w_type, n/2>& c) +{ + typedef typename V_TypeTraits<_Tp>::w_type w_type; + for( int i = 0; i < (n/2); i++ ) + { + c.s[i] = (w_type)a.s[i*2] + a.s[i*2+1]; + } +} + +#define OPENCV_HAL_IMPL_SHIFT_OP(shift_op) \ +template inline v_reg<_Tp, n> operator shift_op(const v_reg<_Tp, n>& a, int imm) \ +{ \ + v_reg<_Tp, n> c; \ + for( int i = 0; i < n; i++ ) \ + c.s[i] = (_Tp)(a.s[i] shift_op imm); \ + return c; \ +} + +OPENCV_HAL_IMPL_SHIFT_OP(<< ) +OPENCV_HAL_IMPL_SHIFT_OP(>> ) + +#define OPENCV_HAL_IMPL_ROTATE_SHIFT_OP(suffix,opA,opB) \ +template inline v_reg<_Tp, n> v_rotate_##suffix(const v_reg<_Tp, n>& a) \ +{ \ + v_reg<_Tp, n> b; \ + for (int i = 0; i < n; i++) \ + { \ + int sIndex = i opA imm; \ + if (0 <= sIndex && sIndex < n) \ + { \ + b.s[i] = a.s[sIndex]; \ + } \ + else \ + { \ + b.s[i] = 0; \ + } \ + } \ + return b; \ +} \ +template inline v_reg<_Tp, n> v_rotate_##suffix(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \ +{ \ + v_reg<_Tp, n> c; \ + for (int i = 0; i < n; i++) \ + { \ + int aIndex = i opA imm; \ + int bIndex = i opA imm opB n; \ + if (0 <= bIndex && bIndex < n) \ + { \ + c.s[i] = b.s[bIndex]; \ + } \ + else if (0 <= aIndex && aIndex < n) \ + { \ + c.s[i] = a.s[aIndex]; \ + } \ + else \ + { \ + c.s[i] = 0; \ + } \ + } \ + return c; \ +} + +OPENCV_HAL_IMPL_ROTATE_SHIFT_OP(left, -, +) +OPENCV_HAL_IMPL_ROTATE_SHIFT_OP(right, +, -) + +template inline typename V_TypeTraits<_Tp>::sum_type v_reduce_sum(const v_reg<_Tp, n>& a) +{ + typename V_TypeTraits<_Tp>::sum_type c = a.s[0]; + for( int i = 1; i < n; i++ ) + c += a.s[i]; + return c; +} + +inline v_float32x4 v_reduce_sum4(const v_float32x4& a, const v_float32x4& b, + const v_float32x4& c, const v_float32x4& d) +{ + v_float32x4 r; + r.s[0] = a.s[0] + a.s[1] + a.s[2] + a.s[3]; + r.s[1] = b.s[0] + b.s[1] + b.s[2] + b.s[3]; + r.s[2] = c.s[0] + c.s[1] + c.s[2] + c.s[3]; + r.s[3] = d.s[0] + d.s[1] + d.s[2] + d.s[3]; + return r; +} + +template inline typename V_TypeTraits< typename V_TypeTraits<_Tp>::abs_type >::sum_type v_reduce_sad(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) +{ + typename V_TypeTraits< typename V_TypeTraits<_Tp>::abs_type >::sum_type c = _absdiff(a.s[0], b.s[0]); + for (int i = 1; i < n; i++) + c += _absdiff(a.s[i], b.s[i]); + return c; +} + +template inline int v_signmask(const v_reg<_Tp, n>& a) +{ + int mask = 0; + for( int i = 0; i < n; i++ ) + mask |= (V_TypeTraits<_Tp>::reinterpret_int(a.s[i]) < 0) << i; + return mask; +} + +template inline bool v_check_all(const v_reg<_Tp, n>& a) +{ + for( int i = 0; i < n; i++ ) + if( V_TypeTraits<_Tp>::reinterpret_int(a.s[i]) >= 0 ) + return false; + return true; +} + +template inline bool v_check_any(const v_reg<_Tp, n>& a) +{ + for( int i = 0; i < n; i++ ) + if( V_TypeTraits<_Tp>::reinterpret_int(a.s[i]) < 0 ) + return true; + return false; +} + +template inline v_reg<_Tp, n> v_select(const v_reg<_Tp, n>& mask, + const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) +{ + typedef V_TypeTraits<_Tp> Traits; + typedef typename Traits::int_type int_type; + v_reg<_Tp, n> c; + for( int i = 0; i < n; i++ ) + { + int_type m = Traits::reinterpret_int(mask.s[i]); + CV_DbgAssert(m == 0 || m == (~(int_type)0)); // restrict mask values: 0 or 0xff/0xffff/etc + c.s[i] = m ? a.s[i] : b.s[i]; + } + return c; +} + +template inline void v_expand(const v_reg<_Tp, n>& a, + v_reg::w_type, n/2>& b0, + v_reg::w_type, n/2>& b1) +{ + for( int i = 0; i < (n/2); i++ ) + { + b0.s[i] = a.s[i]; + b1.s[i] = a.s[i+(n/2)]; + } +} + +template +inline v_reg::w_type, n/2> +v_expand_low(const v_reg<_Tp, n>& a) +{ + v_reg::w_type, n/2> b; + for( int i = 0; i < (n/2); i++ ) + b.s[i] = a.s[i]; + return b; +} + +template +inline v_reg::w_type, n/2> +v_expand_high(const v_reg<_Tp, n>& a) +{ + v_reg::w_type, n/2> b; + for( int i = 0; i < (n/2); i++ ) + b.s[i] = a.s[i+(n/2)]; + return b; +} + +template inline v_reg::int_type, n> + v_reinterpret_as_int(const v_reg<_Tp, n>& a) +{ + v_reg::int_type, n> c; + for( int i = 0; i < n; i++ ) + c.s[i] = V_TypeTraits<_Tp>::reinterpret_int(a.s[i]); + return c; +} + +template inline v_reg::uint_type, n> + v_reinterpret_as_uint(const v_reg<_Tp, n>& a) +{ + v_reg::uint_type, n> c; + for( int i = 0; i < n; i++ ) + c.s[i] = V_TypeTraits<_Tp>::reinterpret_uint(a.s[i]); + return c; +} + +template inline void v_zip( const v_reg<_Tp, n>& a0, const v_reg<_Tp, n>& a1, + v_reg<_Tp, n>& b0, v_reg<_Tp, n>& b1 ) +{ + int i; + for( i = 0; i < n/2; i++ ) + { + b0.s[i*2] = a0.s[i]; + b0.s[i*2+1] = a1.s[i]; + } + for( ; i < n; i++ ) + { + b1.s[i*2-n] = a0.s[i]; + b1.s[i*2-n+1] = a1.s[i]; + } +} + +template +inline v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128> v_load(const _Tp* ptr) +{ + return v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128>(ptr); +} + +template +inline v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128> v_load_aligned(const _Tp* ptr) +{ + return v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128>(ptr); +} + +template +inline v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128> v_load_low(const _Tp* ptr) +{ + v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128> c; + for( int i = 0; i < c.nlanes/2; i++ ) + { + c.s[i] = ptr[i]; + } + return c; +} + +template +inline v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128> v_load_halves(const _Tp* loptr, const _Tp* hiptr) +{ + v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128> c; + for( int i = 0; i < c.nlanes/2; i++ ) + { + c.s[i] = loptr[i]; + c.s[i+c.nlanes/2] = hiptr[i]; + } + return c; +} + +template +inline v_reg::w_type, V_TypeTraits<_Tp>::nlanes128 / 2> +v_load_expand(const _Tp* ptr) +{ + typedef typename V_TypeTraits<_Tp>::w_type w_type; + v_reg::nlanes128> c; + for( int i = 0; i < c.nlanes; i++ ) + { + c.s[i] = ptr[i]; + } + return c; +} + +template +inline v_reg::q_type, V_TypeTraits<_Tp>::nlanes128 / 4> +v_load_expand_q(const _Tp* ptr) +{ + typedef typename V_TypeTraits<_Tp>::q_type q_type; + v_reg::nlanes128> c; + for( int i = 0; i < c.nlanes; i++ ) + { + c.s[i] = ptr[i]; + } + return c; +} + +template inline void v_load_deinterleave(const _Tp* ptr, v_reg<_Tp, n>& a, + v_reg<_Tp, n>& b) +{ + int i, i2; + for( i = i2 = 0; i < n; i++, i2 += 2 ) + { + a.s[i] = ptr[i2]; + b.s[i] = ptr[i2+1]; + } +} + +template inline void v_load_deinterleave(const _Tp* ptr, v_reg<_Tp, n>& a, + v_reg<_Tp, n>& b, v_reg<_Tp, n>& c) +{ + int i, i3; + for( i = i3 = 0; i < n; i++, i3 += 3 ) + { + a.s[i] = ptr[i3]; + b.s[i] = ptr[i3+1]; + c.s[i] = ptr[i3+2]; + } +} + +template +inline void v_load_deinterleave(const _Tp* ptr, v_reg<_Tp, n>& a, + v_reg<_Tp, n>& b, v_reg<_Tp, n>& c, + v_reg<_Tp, n>& d) +{ + int i, i4; + for( i = i4 = 0; i < n; i++, i4 += 4 ) + { + a.s[i] = ptr[i4]; + b.s[i] = ptr[i4+1]; + c.s[i] = ptr[i4+2]; + d.s[i] = ptr[i4+3]; + } +} + +template +inline void v_store_interleave( _Tp* ptr, const v_reg<_Tp, n>& a, + const v_reg<_Tp, n>& b, + hal::StoreMode /*mode*/=hal::STORE_UNALIGNED) +{ + int i, i2; + for( i = i2 = 0; i < n; i++, i2 += 2 ) + { + ptr[i2] = a.s[i]; + ptr[i2+1] = b.s[i]; + } +} + +template +inline void v_store_interleave( _Tp* ptr, const v_reg<_Tp, n>& a, + const v_reg<_Tp, n>& b, const v_reg<_Tp, n>& c, + hal::StoreMode /*mode*/=hal::STORE_UNALIGNED) +{ + int i, i3; + for( i = i3 = 0; i < n; i++, i3 += 3 ) + { + ptr[i3] = a.s[i]; + ptr[i3+1] = b.s[i]; + ptr[i3+2] = c.s[i]; + } +} + +template inline void v_store_interleave( _Tp* ptr, const v_reg<_Tp, n>& a, + const v_reg<_Tp, n>& b, const v_reg<_Tp, n>& c, + const v_reg<_Tp, n>& d, + hal::StoreMode /*mode*/=hal::STORE_UNALIGNED) +{ + int i, i4; + for( i = i4 = 0; i < n; i++, i4 += 4 ) + { + ptr[i4] = a.s[i]; + ptr[i4+1] = b.s[i]; + ptr[i4+2] = c.s[i]; + ptr[i4+3] = d.s[i]; + } +} + +template +inline void v_store(_Tp* ptr, const v_reg<_Tp, n>& a, hal::StoreMode /*mode*/ = hal::STORE_UNALIGNED) +{ + for( int i = 0; i < n; i++ ) + ptr[i] = a.s[i]; +} + +template +inline void v_store_low(_Tp* ptr, const v_reg<_Tp, n>& a) +{ + for( int i = 0; i < (n/2); i++ ) + ptr[i] = a.s[i]; +} + +template +inline void v_store_high(_Tp* ptr, const v_reg<_Tp, n>& a) +{ + for( int i = 0; i < (n/2); i++ ) + ptr[i] = a.s[i+(n/2)]; +} + +template +inline void v_store_aligned(_Tp* ptr, const v_reg<_Tp, n>& a) +{ + for( int i = 0; i < n; i++ ) + ptr[i] = a.s[i]; +} + +template +inline void v_store_aligned_nocache(_Tp* ptr, const v_reg<_Tp, n>& a) +{ + for( int i = 0; i < n; i++ ) + ptr[i] = a.s[i]; +} + +template +inline void v_store_aligned(_Tp* ptr, const v_reg<_Tp, n>& a, hal::StoreMode /*mode*/) +{ + for( int i = 0; i < n; i++ ) + ptr[i] = a.s[i]; +} + +template +inline v_reg<_Tp, n> v_combine_low(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) +{ + v_reg<_Tp, n> c; + for( int i = 0; i < (n/2); i++ ) + { + c.s[i] = a.s[i]; + c.s[i+(n/2)] = b.s[i]; + } + return c; +} + +template +inline v_reg<_Tp, n> v_combine_high(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) +{ + v_reg<_Tp, n> c; + for( int i = 0; i < (n/2); i++ ) + { + c.s[i] = a.s[i+(n/2)]; + c.s[i+(n/2)] = b.s[i+(n/2)]; + } + return c; +} + +template +inline void v_recombine(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b, + v_reg<_Tp, n>& low, v_reg<_Tp, n>& high) +{ + for( int i = 0; i < (n/2); i++ ) + { + low.s[i] = a.s[i]; + low.s[i+(n/2)] = b.s[i]; + high.s[i] = a.s[i+(n/2)]; + high.s[i+(n/2)] = b.s[i+(n/2)]; + } +} + +template +inline v_reg<_Tp, n> v_extract(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) +{ + v_reg<_Tp, n> r; + const int shift = n - s; + int i = 0; + for (; i < shift; ++i) + r.s[i] = a.s[i+s]; + for (; i < n; ++i) + r.s[i] = b.s[i-shift]; + return r; +} + +template inline v_reg v_round(const v_reg& a) +{ + v_reg c; + for( int i = 0; i < n; i++ ) + c.s[i] = cvRound(a.s[i]); + return c; +} + +template inline v_reg v_round(const v_reg& a, const v_reg& b) +{ + v_reg c; + for( int i = 0; i < n; i++ ) + { + c.s[i] = cvRound(a.s[i]); + c.s[i+n] = cvRound(b.s[i]); + } + return c; +} + +template inline v_reg v_floor(const v_reg& a) +{ + v_reg c; + for( int i = 0; i < n; i++ ) + c.s[i] = cvFloor(a.s[i]); + return c; +} + +template inline v_reg v_ceil(const v_reg& a) +{ + v_reg c; + for( int i = 0; i < n; i++ ) + c.s[i] = cvCeil(a.s[i]); + return c; +} + +template inline v_reg v_trunc(const v_reg& a) +{ + v_reg c; + for( int i = 0; i < n; i++ ) + c.s[i] = (int)(a.s[i]); + return c; +} + +template inline v_reg v_round(const v_reg& a) +{ + v_reg c; + for( int i = 0; i < n; i++ ) + { + c.s[i] = cvRound(a.s[i]); + c.s[i+n] = 0; + } + return c; +} + +template inline v_reg v_floor(const v_reg& a) +{ + v_reg c; + for( int i = 0; i < n; i++ ) + { + c.s[i] = cvFloor(a.s[i]); + c.s[i+n] = 0; + } + return c; +} + +template inline v_reg v_ceil(const v_reg& a) +{ + v_reg c; + for( int i = 0; i < n; i++ ) + { + c.s[i] = cvCeil(a.s[i]); + c.s[i+n] = 0; + } + return c; +} + +template inline v_reg v_trunc(const v_reg& a) +{ + v_reg c; + for( int i = 0; i < n; i++ ) + { + c.s[i] = (int)(a.s[i]); + c.s[i+n] = 0; + } + return c; +} + +template inline v_reg v_cvt_f32(const v_reg& a) +{ + v_reg c; + for( int i = 0; i < n; i++ ) + c.s[i] = (float)a.s[i]; + return c; +} + +template inline v_reg v_cvt_f32(const v_reg& a) +{ + v_reg c; + for( int i = 0; i < n; i++ ) + { + c.s[i] = (float)a.s[i]; + c.s[i+n] = 0; + } + return c; +} + +template inline v_reg v_cvt_f32(const v_reg& a, const v_reg& b) +{ + v_reg c; + for( int i = 0; i < n; i++ ) + { + c.s[i] = (float)a.s[i]; + c.s[i+n] = (float)b.s[i]; + } + return c; +} + +inline v_float64x2 v_cvt_f64(const v_int32x4& a) +{ + v_float64x2 c; + for( int i = 0; i < 2; i++ ) + c.s[i] = (double)a.s[i]; + return c; +} + +inline v_float64x2 v_cvt_f64_high(const v_int32x4& a) +{ + v_float64x2 c; + for( int i = 0; i < 2; i++ ) + c.s[i] = (double)a.s[i+2]; + return c; +} + +inline v_float64x2 v_cvt_f64(const v_float32x4& a) +{ + v_float64x2 c; + for( int i = 0; i < 2; i++ ) + c.s[i] = (double)a.s[i]; + return c; +} + +inline v_float64x2 v_cvt_f64_high(const v_float32x4& a) +{ + v_float64x2 c; + for( int i = 0; i < 2; i++ ) + c.s[i] = (double)a.s[i+2]; + return c; +} + +inline v_float64x2 v_cvt_f64(const v_int64x2& a) +{ + v_float64x2 c; + for( int i = 0; i < 2; i++ ) + c.s[i] = (double)a.s[i]; + return c; +} + +template inline v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128> v_lut(const _Tp* tab, const int* idx) +{ + v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128> c; + for (int i = 0; i < V_TypeTraits<_Tp>::nlanes128; i++) + c.s[i] = tab[idx[i]]; + return c; +} +template inline v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128> v_lut_pairs(const _Tp* tab, const int* idx) +{ + v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128> c; + for (int i = 0; i < V_TypeTraits<_Tp>::nlanes128; i++) + c.s[i] = tab[idx[i / 2] + i % 2]; + return c; +} +template inline v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128> v_lut_quads(const _Tp* tab, const int* idx) +{ + v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128> c; + for (int i = 0; i < V_TypeTraits<_Tp>::nlanes128; i++) + c.s[i] = tab[idx[i / 4] + i % 4]; + return c; +} + +template inline v_reg v_lut(const int* tab, const v_reg& idx) +{ + v_reg c; + for( int i = 0; i < n; i++ ) + c.s[i] = tab[idx.s[i]]; + return c; +} + +template inline v_reg v_lut(const unsigned* tab, const v_reg& idx) +{ + v_reg c; + for (int i = 0; i < n; i++) + c.s[i] = tab[idx.s[i]]; + return c; +} + +template inline v_reg v_lut(const float* tab, const v_reg& idx) +{ + v_reg c; + for( int i = 0; i < n; i++ ) + c.s[i] = tab[idx.s[i]]; + return c; +} + +template inline v_reg v_lut(const double* tab, const v_reg& idx) +{ + v_reg c; + for( int i = 0; i < n; i++ ) + c.s[i] = tab[idx.s[i]]; + return c; +} + +template inline void v_lut_deinterleave(const float* tab, const v_reg& idx, + v_reg& x, v_reg& y) +{ + for( int i = 0; i < n; i++ ) + { + int j = idx.s[i]; + x.s[i] = tab[j]; + y.s[i] = tab[j+1]; + } +} + +template inline void v_lut_deinterleave(const double* tab, const v_reg& idx, + v_reg& x, v_reg& y) +{ + for( int i = 0; i < n; i++ ) + { + int j = idx.s[i]; + x.s[i] = tab[j]; + y.s[i] = tab[j+1]; + } +} + +template inline v_reg<_Tp, n> v_interleave_pairs(const v_reg<_Tp, n>& vec) +{ + v_reg<_Tp, n> c; + for (int i = 0; i < n/4; i++) + { + c.s[4*i ] = vec.s[4*i ]; + c.s[4*i+1] = vec.s[4*i+2]; + c.s[4*i+2] = vec.s[4*i+1]; + c.s[4*i+3] = vec.s[4*i+3]; + } + return c; +} + +template inline v_reg<_Tp, n> v_interleave_quads(const v_reg<_Tp, n>& vec) +{ + v_reg<_Tp, n> c; + for (int i = 0; i < n/8; i++) + { + c.s[8*i ] = vec.s[8*i ]; + c.s[8*i+1] = vec.s[8*i+4]; + c.s[8*i+2] = vec.s[8*i+1]; + c.s[8*i+3] = vec.s[8*i+5]; + c.s[8*i+4] = vec.s[8*i+2]; + c.s[8*i+5] = vec.s[8*i+6]; + c.s[8*i+6] = vec.s[8*i+3]; + c.s[8*i+7] = vec.s[8*i+7]; + } + return c; +} + +template inline v_reg<_Tp, n> v_pack_triplets(const v_reg<_Tp, n>& vec) +{ + v_reg<_Tp, n> c; + for (int i = 0; i < n/4; i++) + { + c.s[3*i ] = vec.s[4*i ]; + c.s[3*i+1] = vec.s[4*i+1]; + c.s[3*i+2] = vec.s[4*i+2]; + } + return c; +} + +template +inline void v_transpose4x4( v_reg<_Tp, 4>& a0, const v_reg<_Tp, 4>& a1, + const v_reg<_Tp, 4>& a2, const v_reg<_Tp, 4>& a3, + v_reg<_Tp, 4>& b0, v_reg<_Tp, 4>& b1, + v_reg<_Tp, 4>& b2, v_reg<_Tp, 4>& b3 ) +{ + b0 = v_reg<_Tp, 4>(a0.s[0], a1.s[0], a2.s[0], a3.s[0]); + b1 = v_reg<_Tp, 4>(a0.s[1], a1.s[1], a2.s[1], a3.s[1]); + b2 = v_reg<_Tp, 4>(a0.s[2], a1.s[2], a2.s[2], a3.s[2]); + b3 = v_reg<_Tp, 4>(a0.s[3], a1.s[3], a2.s[3], a3.s[3]); +} + +#define OPENCV_HAL_IMPL_C_INIT_ZERO(_Tpvec, _Tp, suffix) \ +inline _Tpvec v_setzero_##suffix() { return _Tpvec::zero(); } + +OPENCV_HAL_IMPL_C_INIT_ZERO(v_uint8x16, uchar, u8) +OPENCV_HAL_IMPL_C_INIT_ZERO(v_int8x16, schar, s8) +OPENCV_HAL_IMPL_C_INIT_ZERO(v_uint16x8, ushort, u16) +OPENCV_HAL_IMPL_C_INIT_ZERO(v_int16x8, short, s16) +OPENCV_HAL_IMPL_C_INIT_ZERO(v_uint32x4, unsigned, u32) +OPENCV_HAL_IMPL_C_INIT_ZERO(v_int32x4, int, s32) +OPENCV_HAL_IMPL_C_INIT_ZERO(v_float32x4, float, f32) +OPENCV_HAL_IMPL_C_INIT_ZERO(v_float64x2, double, f64) +OPENCV_HAL_IMPL_C_INIT_ZERO(v_uint64x2, uint64, u64) +OPENCV_HAL_IMPL_C_INIT_ZERO(v_int64x2, int64, s64) + +#define OPENCV_HAL_IMPL_C_INIT_VAL(_Tpvec, _Tp, suffix) \ +inline _Tpvec v_setall_##suffix(_Tp val) { return _Tpvec::all(val); } + +OPENCV_HAL_IMPL_C_INIT_VAL(v_int8x16, schar, s8) +OPENCV_HAL_IMPL_C_INIT_VAL(v_uint16x8, ushort, u16) +OPENCV_HAL_IMPL_C_INIT_VAL(v_int16x8, short, s16) +OPENCV_HAL_IMPL_C_INIT_VAL(v_uint32x4, unsigned, u32) +OPENCV_HAL_IMPL_C_INIT_VAL(v_int32x4, int, s32) +OPENCV_HAL_IMPL_C_INIT_VAL(v_float32x4, float, f32) +OPENCV_HAL_IMPL_C_INIT_VAL(v_float64x2, double, f64) +OPENCV_HAL_IMPL_C_INIT_VAL(v_uint64x2, uint64, u64) +OPENCV_HAL_IMPL_C_INIT_VAL(v_int64x2, int64, s64) + +#define OPENCV_HAL_IMPL_C_REINTERPRET(_Tpvec, _Tp, suffix) \ +template inline _Tpvec \ + v_reinterpret_as_##suffix(const v_reg<_Tp0, n0>& a) \ +{ return a.template reinterpret_as<_Tp, _Tpvec::nlanes>(); } + +OPENCV_HAL_IMPL_C_REINTERPRET(v_uint8x16, uchar, u8) +OPENCV_HAL_IMPL_C_REINTERPRET(v_int8x16, schar, s8) +OPENCV_HAL_IMPL_C_REINTERPRET(v_uint16x8, ushort, u16) +OPENCV_HAL_IMPL_C_REINTERPRET(v_int16x8, short, s16) +OPENCV_HAL_IMPL_C_REINTERPRET(v_uint32x4, unsigned, u32) +OPENCV_HAL_IMPL_C_REINTERPRET(v_int32x4, int, s32) +OPENCV_HAL_IMPL_C_REINTERPRET(v_float32x4, float, f32) +OPENCV_HAL_IMPL_C_REINTERPRET(v_float64x2, double, f64) +OPENCV_HAL_IMPL_C_REINTERPRET(v_uint64x2, uint64, u64) +OPENCV_HAL_IMPL_C_REINTERPRET(v_int64x2, int64, s64) + +#define OPENCV_HAL_IMPL_C_SHIFTL(_Tpvec, _Tp) \ +template inline _Tpvec v_shl(const _Tpvec& a) \ +{ return a << n; } + +OPENCV_HAL_IMPL_C_SHIFTL(v_uint16x8, ushort) +OPENCV_HAL_IMPL_C_SHIFTL(v_int16x8, short) +OPENCV_HAL_IMPL_C_SHIFTL(v_uint32x4, unsigned) +OPENCV_HAL_IMPL_C_SHIFTL(v_int32x4, int) +OPENCV_HAL_IMPL_C_SHIFTL(v_uint64x2, uint64) +OPENCV_HAL_IMPL_C_SHIFTL(v_int64x2, int64) + +#define OPENCV_HAL_IMPL_C_SHIFTR(_Tpvec, _Tp) \ +template inline _Tpvec v_shr(const _Tpvec& a) \ +{ return a >> n; } + +OPENCV_HAL_IMPL_C_SHIFTR(v_uint16x8, ushort) +OPENCV_HAL_IMPL_C_SHIFTR(v_int16x8, short) +OPENCV_HAL_IMPL_C_SHIFTR(v_uint32x4, unsigned) +OPENCV_HAL_IMPL_C_SHIFTR(v_int32x4, int) +OPENCV_HAL_IMPL_C_SHIFTR(v_uint64x2, uint64) +OPENCV_HAL_IMPL_C_SHIFTR(v_int64x2, int64) + +#define OPENCV_HAL_IMPL_C_RSHIFTR(_Tpvec, _Tp) \ +template inline _Tpvec v_rshr(const _Tpvec& a) \ +{ \ + _Tpvec c; \ + for( int i = 0; i < _Tpvec::nlanes; i++ ) \ + c.s[i] = (_Tp)((a.s[i] + ((_Tp)1 << (n - 1))) >> n); \ + return c; \ +} + +OPENCV_HAL_IMPL_C_RSHIFTR(v_uint16x8, ushort) +OPENCV_HAL_IMPL_C_RSHIFTR(v_int16x8, short) +OPENCV_HAL_IMPL_C_RSHIFTR(v_uint32x4, unsigned) +OPENCV_HAL_IMPL_C_RSHIFTR(v_int32x4, int) +OPENCV_HAL_IMPL_C_RSHIFTR(v_uint64x2, uint64) +OPENCV_HAL_IMPL_C_RSHIFTR(v_int64x2, int64) + +#define OPENCV_HAL_IMPL_C_PACK(_Tpvec, _Tpnvec, _Tpn, pack_suffix, cast) \ +inline _Tpnvec v_##pack_suffix(const _Tpvec& a, const _Tpvec& b) \ +{ \ + _Tpnvec c; \ + for( int i = 0; i < _Tpvec::nlanes; i++ ) \ + { \ + c.s[i] = cast<_Tpn>(a.s[i]); \ + c.s[i+_Tpvec::nlanes] = cast<_Tpn>(b.s[i]); \ + } \ + return c; \ +} + +OPENCV_HAL_IMPL_C_PACK(v_uint16x8, v_uint8x16, uchar, pack, saturate_cast) +OPENCV_HAL_IMPL_C_PACK(v_int16x8, v_int8x16, schar, pack, saturate_cast) +OPENCV_HAL_IMPL_C_PACK(v_uint32x4, v_uint16x8, ushort, pack, saturate_cast) +OPENCV_HAL_IMPL_C_PACK(v_int32x4, v_int16x8, short, pack, saturate_cast) +OPENCV_HAL_IMPL_C_PACK(v_uint64x2, v_uint32x4, unsigned, pack, static_cast) +OPENCV_HAL_IMPL_C_PACK(v_int64x2, v_int32x4, int, pack, static_cast) +OPENCV_HAL_IMPL_C_PACK(v_int16x8, v_uint8x16, uchar, pack_u, saturate_cast) +OPENCV_HAL_IMPL_C_PACK(v_int32x4, v_uint16x8, ushort, pack_u, saturate_cast) + +#define OPENCV_HAL_IMPL_C_RSHR_PACK(_Tpvec, _Tp, _Tpnvec, _Tpn, pack_suffix, cast) \ +template inline _Tpnvec v_rshr_##pack_suffix(const _Tpvec& a, const _Tpvec& b) \ +{ \ + _Tpnvec c; \ + for( int i = 0; i < _Tpvec::nlanes; i++ ) \ + { \ + c.s[i] = cast<_Tpn>((a.s[i] + ((_Tp)1 << (n - 1))) >> n); \ + c.s[i+_Tpvec::nlanes] = cast<_Tpn>((b.s[i] + ((_Tp)1 << (n - 1))) >> n); \ + } \ + return c; \ +} + +OPENCV_HAL_IMPL_C_RSHR_PACK(v_uint16x8, ushort, v_uint8x16, uchar, pack, saturate_cast) +OPENCV_HAL_IMPL_C_RSHR_PACK(v_int16x8, short, v_int8x16, schar, pack, saturate_cast) +OPENCV_HAL_IMPL_C_RSHR_PACK(v_uint32x4, unsigned, v_uint16x8, ushort, pack, saturate_cast) +OPENCV_HAL_IMPL_C_RSHR_PACK(v_int32x4, int, v_int16x8, short, pack, saturate_cast) +OPENCV_HAL_IMPL_C_RSHR_PACK(v_uint64x2, uint64, v_uint32x4, unsigned, pack, static_cast) +OPENCV_HAL_IMPL_C_RSHR_PACK(v_int64x2, int64, v_int32x4, int, pack, static_cast) +OPENCV_HAL_IMPL_C_RSHR_PACK(v_int16x8, short, v_uint8x16, uchar, pack_u, saturate_cast) +OPENCV_HAL_IMPL_C_RSHR_PACK(v_int32x4, int, v_uint16x8, ushort, pack_u, saturate_cast) + +#define OPENCV_HAL_IMPL_C_PACK_STORE(_Tpvec, _Tp, _Tpnvec, _Tpn, pack_suffix, cast) \ +inline void v_##pack_suffix##_store(_Tpn* ptr, const _Tpvec& a) \ +{ \ + for( int i = 0; i < _Tpvec::nlanes; i++ ) \ + ptr[i] = cast<_Tpn>(a.s[i]); \ +} + +OPENCV_HAL_IMPL_C_PACK_STORE(v_uint16x8, ushort, v_uint8x16, uchar, pack, saturate_cast) +OPENCV_HAL_IMPL_C_PACK_STORE(v_int16x8, short, v_int8x16, schar, pack, saturate_cast) +OPENCV_HAL_IMPL_C_PACK_STORE(v_uint32x4, unsigned, v_uint16x8, ushort, pack, saturate_cast) +OPENCV_HAL_IMPL_C_PACK_STORE(v_int32x4, int, v_int16x8, short, pack, saturate_cast) +OPENCV_HAL_IMPL_C_PACK_STORE(v_uint64x2, uint64, v_uint32x4, unsigned, pack, static_cast) +OPENCV_HAL_IMPL_C_PACK_STORE(v_int64x2, int64, v_int32x4, int, pack, static_cast) +OPENCV_HAL_IMPL_C_PACK_STORE(v_int16x8, short, v_uint8x16, uchar, pack_u, saturate_cast) +OPENCV_HAL_IMPL_C_PACK_STORE(v_int32x4, int, v_uint16x8, ushort, pack_u, saturate_cast) + +#define OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(_Tpvec, _Tp, _Tpnvec, _Tpn, pack_suffix, cast) \ +template inline void v_rshr_##pack_suffix##_store(_Tpn* ptr, const _Tpvec& a) \ +{ \ + for( int i = 0; i < _Tpvec::nlanes; i++ ) \ + ptr[i] = cast<_Tpn>((a.s[i] + ((_Tp)1 << (n - 1))) >> n); \ +} + +OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_uint16x8, ushort, v_uint8x16, uchar, pack, saturate_cast) +OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_int16x8, short, v_int8x16, schar, pack, saturate_cast) +OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_uint32x4, unsigned, v_uint16x8, ushort, pack, saturate_cast) +OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_int32x4, int, v_int16x8, short, pack, saturate_cast) +OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_uint64x2, uint64, v_uint32x4, unsigned, pack, static_cast) +OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_int64x2, int64, v_int32x4, int, pack, static_cast) +OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_int16x8, short, v_uint8x16, uchar, pack_u, saturate_cast) +OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_int32x4, int, v_uint16x8, ushort, pack_u, saturate_cast) + +template +inline void _pack_b(_Tpm* mptr, const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) +{ + for (int i = 0; i < n; ++i) + { + mptr[i] = (_Tpm)a.s[i]; + mptr[i + n] = (_Tpm)b.s[i]; + } +} + +inline v_uint8x16 v_pack_b(const v_uint16x8& a, const v_uint16x8& b) +{ + v_uint8x16 mask; + _pack_b(mask.s, a, b); + return mask; +} + +inline v_uint8x16 v_pack_b(const v_uint32x4& a, const v_uint32x4& b, + const v_uint32x4& c, const v_uint32x4& d) +{ + v_uint8x16 mask; + _pack_b(mask.s, a, b); + _pack_b(mask.s + 8, c, d); + return mask; +} + +inline v_uint8x16 v_pack_b(const v_uint64x2& a, const v_uint64x2& b, const v_uint64x2& c, + const v_uint64x2& d, const v_uint64x2& e, const v_uint64x2& f, + const v_uint64x2& g, const v_uint64x2& h) +{ + v_uint8x16 mask; + _pack_b(mask.s, a, b); + _pack_b(mask.s + 4, c, d); + _pack_b(mask.s + 8, e, f); + _pack_b(mask.s + 12, g, h); + return mask; +} + +inline v_float32x4 v_matmul(const v_float32x4& v, const v_float32x4& m0, + const v_float32x4& m1, const v_float32x4& m2, + const v_float32x4& m3) +{ + return v_float32x4(v.s[0]*m0.s[0] + v.s[1]*m1.s[0] + v.s[2]*m2.s[0] + v.s[3]*m3.s[0], + v.s[0]*m0.s[1] + v.s[1]*m1.s[1] + v.s[2]*m2.s[1] + v.s[3]*m3.s[1], + v.s[0]*m0.s[2] + v.s[1]*m1.s[2] + v.s[2]*m2.s[2] + v.s[3]*m3.s[2], + v.s[0]*m0.s[3] + v.s[1]*m1.s[3] + v.s[2]*m2.s[3] + v.s[3]*m3.s[3]); +} + +inline v_float32x4 v_matmuladd(const v_float32x4& v, const v_float32x4& m0, + const v_float32x4& m1, const v_float32x4& m2, + const v_float32x4& m3) +{ + return v_float32x4(v.s[0]*m0.s[0] + v.s[1]*m1.s[0] + v.s[2]*m2.s[0] + m3.s[0], + v.s[0]*m0.s[1] + v.s[1]*m1.s[1] + v.s[2]*m2.s[1] + m3.s[1], + v.s[0]*m0.s[2] + v.s[1]*m1.s[2] + v.s[2]*m2.s[2] + m3.s[2], + v.s[0]*m0.s[3] + v.s[1]*m1.s[3] + v.s[2]*m2.s[3] + m3.s[3]); +} + +inline v_reg::nlanes128> +v_load_expand(const float16_t* ptr) +{ + v_reg::nlanes128> v; + for( int i = 0; i < v.nlanes; i++ ) + { + v.s[i] = ptr[i]; + } + return v; +} + +inline void +v_pack_store(float16_t* ptr, const v_reg::nlanes128>& v) +{ + for( int i = 0; i < v.nlanes; i++ ) + { + ptr[i] = float16_t(v.s[i]); + } +} + +inline void v_cleanup() {} +} // namespace fallback + +static v128_t wasm_unpacklo_i8x16(v128_t a, v128_t b) { + return wasm_v8x16_shuffle(a, b, 0,16,1,17,2,18,3,19,4,20,5,21,6,22,7,23); +} + +static v128_t wasm_unpacklo_i16x8(v128_t a, v128_t b) { + return wasm_v8x16_shuffle(a, b, 0,1,16,17,2,3,18,19,4,5,20,21,6,7,22,23); +} + +static v128_t wasm_unpacklo_i32x4(v128_t a, v128_t b) { + return wasm_v8x16_shuffle(a, b, 0,1,2,3,16,17,18,19,4,5,6,7,20,21,22,23); +} + +static v128_t wasm_unpacklo_i64x2(v128_t a, v128_t b) { + return wasm_v8x16_shuffle(a, b, 0,1,2,3,4,5,6,7,16,17,18,19,20,21,22,23); +} + +static v128_t wasm_unpackhi_i8x16(v128_t a, v128_t b) { + return wasm_v8x16_shuffle(a, b, 8,24,9,25,10,26,11,27,12,28,13,29,14,30,15,31); +} + +static v128_t wasm_unpackhi_i16x8(v128_t a, v128_t b) { + return wasm_v8x16_shuffle(a, b, 8,9,24,25,10,11,26,27,12,13,28,29,14,15,30,31); +} + +static v128_t wasm_unpackhi_i32x4(v128_t a, v128_t b) { + return wasm_v8x16_shuffle(a, b, 8,9,10,11,24,25,26,27,12,13,14,15,28,29,30,31); +} + +static v128_t wasm_unpackhi_i64x2(v128_t a, v128_t b) { + return wasm_v8x16_shuffle(a, b, 8,9,10,11,12,13,14,15,24,25,26,27,28,29,30,31); +} + +/** Convert **/ +// 8 >> 16 +inline v128_t v128_cvtu8x16_i16x8(const v128_t& a) +{ + const v128_t z = wasm_i8x16_splat(0); + return wasm_unpacklo_i8x16(a, z); +} +inline v128_t v128_cvti8x16_i16x8(const v128_t& a) +{ return wasm_i16x8_shr(wasm_unpacklo_i8x16(a, a), 8); } +// 8 >> 32 +inline v128_t v128_cvtu8x16_i32x4(const v128_t& a) +{ + const v128_t z = wasm_i8x16_splat(0); + return wasm_unpacklo_i16x8(wasm_unpacklo_i8x16(a, z), z); +} +inline v128_t v128_cvti8x16_i32x4(const v128_t& a) +{ + v128_t r = wasm_unpacklo_i8x16(a, a); + r = wasm_unpacklo_i8x16(r, r); + return wasm_i32x4_shr(r, 24); +} +// 16 >> 32 +inline v128_t v128_cvtu16x8_i32x4(const v128_t& a) +{ + const v128_t z = wasm_i8x16_splat(0); + return wasm_unpacklo_i16x8(a, z); +} +inline v128_t v128_cvti16x8_i32x4(const v128_t& a) +{ return wasm_i32x4_shr(wasm_unpacklo_i16x8(a, a), 16); } +// 32 >> 64 +inline v128_t v128_cvtu32x4_i64x2(const v128_t& a) +{ + const v128_t z = wasm_i8x16_splat(0); + return wasm_unpacklo_i32x4(a, z); +} +inline v128_t v128_cvti32x4_i64x2(const v128_t& a) +{ return wasm_unpacklo_i32x4(a, wasm_i32x4_shr(a, 31)); } + +// 16 << 8 +inline v128_t v128_cvtu8x16_i16x8_high(const v128_t& a) +{ + const v128_t z = wasm_i8x16_splat(0); + return wasm_unpackhi_i8x16(a, z); +} +inline v128_t v128_cvti8x16_i16x8_high(const v128_t& a) +{ return wasm_i16x8_shr(wasm_unpackhi_i8x16(a, a), 8); } +// 32 << 16 +inline v128_t v128_cvtu16x8_i32x4_high(const v128_t& a) +{ + const v128_t z = wasm_i8x16_splat(0); + return wasm_unpackhi_i16x8(a, z); +} +inline v128_t v128_cvti16x8_i32x4_high(const v128_t& a) +{ return wasm_i32x4_shr(wasm_unpackhi_i16x8(a, a), 16); } +// 64 << 32 +inline v128_t v128_cvtu32x4_i64x2_high(const v128_t& a) +{ + const v128_t z = wasm_i8x16_splat(0); + return wasm_unpackhi_i32x4(a, z); +} +inline v128_t v128_cvti32x4_i64x2_high(const v128_t& a) +{ return wasm_unpackhi_i32x4(a, wasm_i32x4_shr(a, 31)); } + +#define OPENCV_HAL_IMPL_WASM_INITVEC(_Tpvec, _Tp, suffix, zsuffix, _Tps) \ +inline _Tpvec v_setzero_##suffix() { return _Tpvec(wasm_##zsuffix##_splat((_Tps)0)); } \ +inline _Tpvec v_setall_##suffix(_Tp v) { return _Tpvec(wasm_##zsuffix##_splat((_Tps)v)); } \ +template inline _Tpvec v_reinterpret_as_##suffix(const _Tpvec0& a) \ +{ return _Tpvec(a.val); } + +OPENCV_HAL_IMPL_WASM_INITVEC(v_uint8x16, uchar, u8, i8x16, schar) +OPENCV_HAL_IMPL_WASM_INITVEC(v_int8x16, schar, s8, i8x16, schar) +OPENCV_HAL_IMPL_WASM_INITVEC(v_uint16x8, ushort, u16, i16x8, short) +OPENCV_HAL_IMPL_WASM_INITVEC(v_int16x8, short, s16, i16x8, short) +OPENCV_HAL_IMPL_WASM_INITVEC(v_uint32x4, unsigned, u32, i32x4, int) +OPENCV_HAL_IMPL_WASM_INITVEC(v_int32x4, int, s32, i32x4, int) +OPENCV_HAL_IMPL_WASM_INITVEC(v_float32x4, float, f32, f32x4, float) + +#ifdef __wasm_unimplemented_simd128__ +OPENCV_HAL_IMPL_WASM_INITVEC(v_uint64x2, uint64, u64, i64x2, int64) +OPENCV_HAL_IMPL_WASM_INITVEC(v_int64x2, int64, s64, i64x2, int64) +OPENCV_HAL_IMPL_WASM_INITVEC(v_float64x2, double, f64, f64x2, double) +#else +#define OPENCV_HAL_IMPL_FALLBACK_INITVEC(_Tpvec, _Tp, suffix, _Tps) \ +inline _Tpvec v_setzero_##suffix() { return _Tpvec((_Tps)0, (_Tps)0); } \ +inline _Tpvec v_setall_##suffix(_Tp v) { return _Tpvec((_Tps)v, (_Tps)v); } \ +template inline _Tpvec v_reinterpret_as_##suffix(const _Tpvec0& a) \ +{ return _Tpvec(a.val); } + +OPENCV_HAL_IMPL_FALLBACK_INITVEC(v_uint64x2, uint64, u64, int64) +OPENCV_HAL_IMPL_FALLBACK_INITVEC(v_int64x2, int64, s64, int64) +OPENCV_HAL_IMPL_FALLBACK_INITVEC(v_float64x2, double, f64, double) +#endif + +//////////////// PACK /////////////// +inline v_uint8x16 v_pack(const v_uint16x8& a, const v_uint16x8& b) +{ + v128_t maxval = wasm_i16x8_splat(255); + v128_t a1 = wasm_v128_bitselect(maxval, a.val, wasm_u16x8_gt(a.val, maxval)); + v128_t b1 = wasm_v128_bitselect(maxval, b.val, wasm_u16x8_gt(b.val, maxval)); + return v_uint8x16(wasm_v8x16_shuffle(a1, b1, 0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30)); +} +inline v_int8x16 v_pack(const v_int16x8& a, const v_int16x8& b) +{ + v128_t maxval = wasm_i16x8_splat(127); + v128_t minval = wasm_i16x8_splat(-128); + v128_t a1 = wasm_v128_bitselect(maxval, a.val, wasm_i16x8_gt(a.val, maxval)); + v128_t b1 = wasm_v128_bitselect(maxval, b.val, wasm_i16x8_gt(b.val, maxval)); + v128_t a2 = wasm_v128_bitselect(minval, a1, wasm_i16x8_lt(a1, minval)); + v128_t b2 = wasm_v128_bitselect(minval, b1, wasm_i16x8_lt(b1, minval)); + return v_int8x16(wasm_v8x16_shuffle(a2, b2, 0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30)); +} +inline v_uint16x8 v_pack(const v_uint32x4& a, const v_uint32x4& b) +{ + v128_t maxval = wasm_i32x4_splat(65535); + v128_t a1 = wasm_v128_bitselect(maxval, a.val, wasm_u32x4_gt(a.val, maxval)); + v128_t b1 = wasm_v128_bitselect(maxval, b.val, wasm_u32x4_gt(b.val, maxval)); + return v_uint16x8(wasm_v8x16_shuffle(a1, b1, 0,1,4,5,8,9,12,13,16,17,20,21,24,25,28,29)); +} +inline v_int16x8 v_pack(const v_int32x4& a, const v_int32x4& b) +{ + v128_t maxval = wasm_i32x4_splat(32767); + v128_t minval = wasm_i32x4_splat(-32768); + v128_t a1 = wasm_v128_bitselect(maxval, a.val, wasm_i32x4_gt(a.val, maxval)); + v128_t b1 = wasm_v128_bitselect(maxval, b.val, wasm_i32x4_gt(b.val, maxval)); + v128_t a2 = wasm_v128_bitselect(minval, a1, wasm_i32x4_lt(a1, minval)); + v128_t b2 = wasm_v128_bitselect(minval, b1, wasm_i32x4_lt(b1, minval)); + return v_int16x8(wasm_v8x16_shuffle(a2, b2, 0,1,4,5,8,9,12,13,16,17,20,21,24,25,28,29)); +} +inline v_uint32x4 v_pack(const v_uint64x2& a, const v_uint64x2& b) +{ + return v_uint32x4(wasm_v8x16_shuffle(a.val, b.val, 0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27)); +} +inline v_int32x4 v_pack(const v_int64x2& a, const v_int64x2& b) +{ + return v_int32x4(wasm_v8x16_shuffle(a.val, b.val, 0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27)); +} +inline v_uint8x16 v_pack_u(const v_int16x8& a, const v_int16x8& b) +{ + v128_t maxval = wasm_i16x8_splat(255); + v128_t minval = wasm_i16x8_splat(0); + v128_t a1 = wasm_v128_bitselect(maxval, a.val, wasm_i16x8_gt(a.val, maxval)); + v128_t b1 = wasm_v128_bitselect(maxval, b.val, wasm_i16x8_gt(b.val, maxval)); + v128_t a2 = wasm_v128_bitselect(minval, a1, wasm_i16x8_lt(a1, minval)); + v128_t b2 = wasm_v128_bitselect(minval, b1, wasm_i16x8_lt(b1, minval)); + return v_uint8x16(wasm_v8x16_shuffle(a2, b2, 0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30)); +} +inline v_uint16x8 v_pack_u(const v_int32x4& a, const v_int32x4& b) +{ + v128_t maxval = wasm_i32x4_splat(65535); + v128_t minval = wasm_i32x4_splat(0); + v128_t a1 = wasm_v128_bitselect(maxval, a.val, wasm_i32x4_gt(a.val, maxval)); + v128_t b1 = wasm_v128_bitselect(maxval, b.val, wasm_i32x4_gt(b.val, maxval)); + v128_t a2 = wasm_v128_bitselect(minval, a1, wasm_i32x4_lt(a1, minval)); + v128_t b2 = wasm_v128_bitselect(minval, b1, wasm_i32x4_lt(b1, minval)); + return v_uint16x8(wasm_v8x16_shuffle(a2, b2, 0,1,4,5,8,9,12,13,16,17,20,21,24,25,28,29)); +} + +template +inline v_uint8x16 v_rshr_pack(const v_uint16x8& a, const v_uint16x8& b) +{ + v128_t delta = wasm_i16x8_splat(((short)1 << (n-1))); + v128_t a1 = wasm_u16x8_shr(wasm_i16x8_add(a.val, delta), n); + v128_t b1 = wasm_u16x8_shr(wasm_i16x8_add(b.val, delta), n); + v128_t maxval = wasm_i16x8_splat(255); + v128_t a2 = wasm_v128_bitselect(maxval, a1, wasm_u16x8_gt(a1, maxval)); + v128_t b2 = wasm_v128_bitselect(maxval, b1, wasm_u16x8_gt(b1, maxval)); + return v_uint8x16(wasm_v8x16_shuffle(a2, b2, 0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30)); +} +template +inline v_int8x16 v_rshr_pack(const v_int16x8& a, const v_int16x8& b) +{ + v128_t delta = wasm_i16x8_splat(((short)1 << (n-1))); + v128_t a1 = wasm_i16x8_shr(wasm_i16x8_add(a.val, delta), n); + v128_t b1 = wasm_i16x8_shr(wasm_i16x8_add(b.val, delta), n); + v128_t maxval = wasm_i16x8_splat(127); + v128_t minval = wasm_i16x8_splat(-128); + v128_t a2 = wasm_v128_bitselect(maxval, a1, wasm_i16x8_gt(a1, maxval)); + v128_t b2 = wasm_v128_bitselect(maxval, b1, wasm_i16x8_gt(b1, maxval)); + v128_t a3 = wasm_v128_bitselect(minval, a2, wasm_i16x8_lt(a1, minval)); + v128_t b3 = wasm_v128_bitselect(minval, b2, wasm_i16x8_lt(b1, minval)); + return v_int8x16(wasm_v8x16_shuffle(a3, b3, 0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30)); +} +template +inline v_uint16x8 v_rshr_pack(const v_uint32x4& a, const v_uint32x4& b) +{ + v128_t delta = wasm_i32x4_splat(((int)1 << (n-1))); + v128_t a1 = wasm_u32x4_shr(wasm_i32x4_add(a.val, delta), n); + v128_t b1 = wasm_u32x4_shr(wasm_i32x4_add(b.val, delta), n); + v128_t maxval = wasm_i32x4_splat(65535); + v128_t a2 = wasm_v128_bitselect(maxval, a1, wasm_u32x4_gt(a1, maxval)); + v128_t b2 = wasm_v128_bitselect(maxval, b1, wasm_u32x4_gt(b1, maxval)); + return v_uint16x8(wasm_v8x16_shuffle(a2, b2, 0,1,4,5,8,9,12,13,16,17,20,21,24,25,28,29)); +} +template +inline v_int16x8 v_rshr_pack(const v_int32x4& a, const v_int32x4& b) +{ + v128_t delta = wasm_i32x4_splat(((int)1 << (n-1))); + v128_t a1 = wasm_i32x4_shr(wasm_i32x4_add(a.val, delta), n); + v128_t b1 = wasm_i32x4_shr(wasm_i32x4_add(b.val, delta), n); + v128_t maxval = wasm_i32x4_splat(32767); + v128_t minval = wasm_i16x8_splat(-32768); + v128_t a2 = wasm_v128_bitselect(maxval, a1, wasm_i32x4_gt(a1, maxval)); + v128_t b2 = wasm_v128_bitselect(maxval, b1, wasm_i32x4_gt(b1, maxval)); + v128_t a3 = wasm_v128_bitselect(minval, a2, wasm_i32x4_lt(a1, minval)); + v128_t b3 = wasm_v128_bitselect(minval, b2, wasm_i32x4_lt(b1, minval)); + return v_int16x8(wasm_v8x16_shuffle(a3, b3, 0,1,4,5,8,9,12,13,16,17,20,21,24,25,28,29)); +} +template +inline v_uint32x4 v_rshr_pack(const v_uint64x2& a, const v_uint64x2& b) +{ +#ifdef __wasm_unimplemented_simd128__ + v128_t delta = wasm_i64x2_splat(((int64)1 << (n-1))); + v128_t a1 = wasm_u64x2_shr(wasm_i64x2_add(a.val, delta), n); + v128_t b1 = wasm_u64x2_shr(wasm_i64x2_add(b.val, delta), n); + return v_uint32x4(wasm_v8x16_shuffle(a1, b1, 0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27)); +#else + fallback::v_uint64x2 a_(a), b_(b); + return fallback::v_rshr_pack(a_, b_); +#endif +} +template +inline v_int32x4 v_rshr_pack(const v_int64x2& a, const v_int64x2& b) +{ +#ifdef __wasm_unimplemented_simd128__ + v128_t delta = wasm_i64x2_splat(((int64)1 << (n-1))); + v128_t a1 = wasm_i64x2_shr(wasm_i64x2_add(a.val, delta), n); + v128_t b1 = wasm_i64x2_shr(wasm_i64x2_add(b.val, delta), n); + return v_int32x4(wasm_v8x16_shuffle(a1, b1, 0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27)); +#else + fallback::v_int64x2 a_(a), b_(b); + return fallback::v_rshr_pack(a_, b_); +#endif +} +template +inline v_uint8x16 v_rshr_pack_u(const v_int16x8& a, const v_int16x8& b) +{ + v128_t delta = wasm_i16x8_splat(((short)1 << (n-1))); + v128_t a1 = wasm_i16x8_shr(wasm_i16x8_add(a.val, delta), n); + v128_t b1 = wasm_i16x8_shr(wasm_i16x8_add(b.val, delta), n); + v128_t maxval = wasm_i16x8_splat(255); + v128_t minval = wasm_i16x8_splat(0); + v128_t a2 = wasm_v128_bitselect(maxval, a1, wasm_i16x8_gt(a1, maxval)); + v128_t b2 = wasm_v128_bitselect(maxval, b1, wasm_i16x8_gt(b1, maxval)); + v128_t a3 = wasm_v128_bitselect(minval, a2, wasm_i16x8_lt(a1, minval)); + v128_t b3 = wasm_v128_bitselect(minval, b2, wasm_i16x8_lt(b1, minval)); + return v_uint8x16(wasm_v8x16_shuffle(a3, b3, 0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30)); +} +template +inline v_uint16x8 v_rshr_pack_u(const v_int32x4& a, const v_int32x4& b) +{ + v128_t delta = wasm_i32x4_splat(((int)1 << (n-1))); + v128_t a1 = wasm_i32x4_shr(wasm_i32x4_add(a.val, delta), n); + v128_t b1 = wasm_i32x4_shr(wasm_i32x4_add(b.val, delta), n); + v128_t maxval = wasm_i32x4_splat(65535); + v128_t minval = wasm_i16x8_splat(0); + v128_t a2 = wasm_v128_bitselect(maxval, a1, wasm_i32x4_gt(a1, maxval)); + v128_t b2 = wasm_v128_bitselect(maxval, b1, wasm_i32x4_gt(b1, maxval)); + v128_t a3 = wasm_v128_bitselect(minval, a2, wasm_i32x4_lt(a1, minval)); + v128_t b3 = wasm_v128_bitselect(minval, b2, wasm_i32x4_lt(b1, minval)); + return v_uint16x8(wasm_v8x16_shuffle(a3, b3, 0,1,4,5,8,9,12,13,16,17,20,21,24,25,28,29)); +} + +inline void v_pack_store(uchar* ptr, const v_uint16x8& a) +{ + v128_t maxval = wasm_i16x8_splat(255); + v128_t a1 = wasm_v128_bitselect(maxval, a.val, wasm_u16x8_gt(a.val, maxval)); + v128_t r = wasm_v8x16_shuffle(a1, a1, 0,2,4,6,8,10,12,14,0,2,4,6,8,10,12,14); + uchar t_ptr[16]; + wasm_v128_store(t_ptr, r); + for (int i=0; i<8; ++i) { + ptr[i] = t_ptr[i]; + } +} +inline void v_pack_store(schar* ptr, const v_int16x8& a) +{ + v128_t maxval = wasm_i16x8_splat(127); + v128_t minval = wasm_i16x8_splat(-128); + v128_t a1 = wasm_v128_bitselect(maxval, a.val, wasm_i16x8_gt(a.val, maxval)); + v128_t a2 = wasm_v128_bitselect(minval, a1, wasm_i16x8_lt(a1, minval)); + v128_t r = wasm_v8x16_shuffle(a2, a2, 0,2,4,6,8,10,12,14,0,2,4,6,8,10,12,14); + schar t_ptr[16]; + wasm_v128_store(t_ptr, r); + for (int i=0; i<8; ++i) { + ptr[i] = t_ptr[i]; + } +} +inline void v_pack_store(ushort* ptr, const v_uint32x4& a) +{ + v128_t maxval = wasm_i32x4_splat(65535); + v128_t a1 = wasm_v128_bitselect(maxval, a.val, wasm_u32x4_gt(a.val, maxval)); + v128_t r = wasm_v8x16_shuffle(a1, a1, 0,1,4,5,8,9,12,13,0,1,4,5,8,9,12,13); + ushort t_ptr[8]; + wasm_v128_store(t_ptr, r); + for (int i=0; i<4; ++i) { + ptr[i] = t_ptr[i]; + } +} +inline void v_pack_store(short* ptr, const v_int32x4& a) +{ + v128_t maxval = wasm_i32x4_splat(32767); + v128_t minval = wasm_i32x4_splat(-32768); + v128_t a1 = wasm_v128_bitselect(maxval, a.val, wasm_i32x4_gt(a.val, maxval)); + v128_t a2 = wasm_v128_bitselect(minval, a1, wasm_i32x4_lt(a1, minval)); + v128_t r = wasm_v8x16_shuffle(a2, a2, 0,1,4,5,8,9,12,13,0,1,4,5,8,9,12,13); + short t_ptr[8]; + wasm_v128_store(t_ptr, r); + for (int i=0; i<4; ++i) { + ptr[i] = t_ptr[i]; + } +} +inline void v_pack_store(unsigned* ptr, const v_uint64x2& a) +{ + v128_t r = wasm_v8x16_shuffle(a.val, a.val, 0,1,2,3,8,9,10,11,0,1,2,3,8,9,10,11); + unsigned t_ptr[4]; + wasm_v128_store(t_ptr, r); + for (int i=0; i<2; ++i) { + ptr[i] = t_ptr[i]; + } +} +inline void v_pack_store(int* ptr, const v_int64x2& a) +{ + v128_t r = wasm_v8x16_shuffle(a.val, a.val, 0,1,2,3,8,9,10,11,0,1,2,3,8,9,10,11); + int t_ptr[4]; + wasm_v128_store(t_ptr, r); + for (int i=0; i<2; ++i) { + ptr[i] = t_ptr[i]; + } +} +inline void v_pack_u_store(uchar* ptr, const v_int16x8& a) +{ + v128_t maxval = wasm_i16x8_splat(255); + v128_t minval = wasm_i16x8_splat(0); + v128_t a1 = wasm_v128_bitselect(maxval, a.val, wasm_i16x8_gt(a.val, maxval)); + v128_t a2 = wasm_v128_bitselect(minval, a1, wasm_i16x8_lt(a1, minval)); + v128_t r = wasm_v8x16_shuffle(a2, a2, 0,2,4,6,8,10,12,14,0,2,4,6,8,10,12,14); + uchar t_ptr[16]; + wasm_v128_store(t_ptr, r); + for (int i=0; i<8; ++i) { + ptr[i] = t_ptr[i]; + } +} +inline void v_pack_u_store(ushort* ptr, const v_int32x4& a) +{ + v128_t maxval = wasm_i32x4_splat(65535); + v128_t minval = wasm_i32x4_splat(0); + v128_t a1 = wasm_v128_bitselect(maxval, a.val, wasm_i32x4_gt(a.val, maxval)); + v128_t a2 = wasm_v128_bitselect(minval, a1, wasm_i32x4_lt(a1, minval)); + v128_t r = wasm_v8x16_shuffle(a2, a2, 0,1,4,5,8,9,12,13,0,1,4,5,8,9,12,13); + ushort t_ptr[8]; + wasm_v128_store(t_ptr, r); + for (int i=0; i<4; ++i) { + ptr[i] = t_ptr[i]; + } +} + +template +inline void v_rshr_pack_store(uchar* ptr, const v_uint16x8& a) +{ + v128_t delta = wasm_i16x8_splat((short)(1 << (n-1))); + v128_t a1 = wasm_u16x8_shr(wasm_i16x8_add(a.val, delta), n); + v128_t maxval = wasm_i16x8_splat(255); + v128_t a2 = wasm_v128_bitselect(maxval, a1, wasm_u16x8_gt(a1, maxval)); + v128_t r = wasm_v8x16_shuffle(a2, a2, 0,2,4,6,8,10,12,14,0,2,4,6,8,10,12,14); + uchar t_ptr[16]; + wasm_v128_store(t_ptr, r); + for (int i=0; i<8; ++i) { + ptr[i] = t_ptr[i]; + } +} +template +inline void v_rshr_pack_store(schar* ptr, const v_int16x8& a) +{ + v128_t delta = wasm_i16x8_splat(((short)1 << (n-1))); + v128_t a1 = wasm_i16x8_shr(wasm_i16x8_add(a.val, delta), n); + v128_t maxval = wasm_i16x8_splat(127); + v128_t minval = wasm_i16x8_splat(-128); + v128_t a2 = wasm_v128_bitselect(maxval, a1, wasm_i16x8_gt(a1, maxval)); + v128_t a3 = wasm_v128_bitselect(minval, a2, wasm_i16x8_lt(a1, minval)); + v128_t r = wasm_v8x16_shuffle(a3, a3, 0,2,4,6,8,10,12,14,0,2,4,6,8,10,12,14); + schar t_ptr[16]; + wasm_v128_store(t_ptr, r); + for (int i=0; i<8; ++i) { + ptr[i] = t_ptr[i]; + } +} +template +inline void v_rshr_pack_store(ushort* ptr, const v_uint32x4& a) +{ + v128_t delta = wasm_i32x4_splat(((int)1 << (n-1))); + v128_t a1 = wasm_u32x4_shr(wasm_i32x4_add(a.val, delta), n); + v128_t maxval = wasm_i32x4_splat(65535); + v128_t a2 = wasm_v128_bitselect(maxval, a1, wasm_u32x4_gt(a1, maxval)); + v128_t r = wasm_v8x16_shuffle(a2, a2, 0,1,4,5,8,9,12,13,0,1,4,5,8,9,12,13); + ushort t_ptr[8]; + wasm_v128_store(t_ptr, r); + for (int i=0; i<4; ++i) { + ptr[i] = t_ptr[i]; + } +} +template +inline void v_rshr_pack_store(short* ptr, const v_int32x4& a) +{ + v128_t delta = wasm_i32x4_splat(((int)1 << (n-1))); + v128_t a1 = wasm_i32x4_shr(wasm_i32x4_add(a.val, delta), n); + v128_t maxval = wasm_i32x4_splat(32767); + v128_t minval = wasm_i32x4_splat(-32768); + v128_t a2 = wasm_v128_bitselect(maxval, a1, wasm_i32x4_gt(a1, maxval)); + v128_t a3 = wasm_v128_bitselect(minval, a2, wasm_i32x4_lt(a1, minval)); + v128_t r = wasm_v8x16_shuffle(a3, a3, 0,1,4,5,8,9,12,13,0,1,4,5,8,9,12,13); + short t_ptr[8]; + wasm_v128_store(t_ptr, r); + for (int i=0; i<4; ++i) { + ptr[i] = t_ptr[i]; + } +} +template +inline void v_rshr_pack_store(unsigned* ptr, const v_uint64x2& a) +{ +#ifdef __wasm_unimplemented_simd128__ + v128_t delta = wasm_i64x2_splat(((int64)1 << (n-1))); + v128_t a1 = wasm_u64x2_shr(wasm_i64x2_add(a.val, delta), n); + v128_t r = wasm_v8x16_shuffle(a1, a1, 0,1,2,3,8,9,10,11,0,1,2,3,8,9,10,11); + unsigned t_ptr[4]; + wasm_v128_store(t_ptr, r); + for (int i=0; i<2; ++i) { + ptr[i] = t_ptr[i]; + } +#else + fallback::v_uint64x2 _a(a); + fallback::v_rshr_pack_store(ptr, _a); +#endif +} +template +inline void v_rshr_pack_store(int* ptr, const v_int64x2& a) +{ +#ifdef __wasm_unimplemented_simd128__ + v128_t delta = wasm_i64x2_splat(((int64)1 << (n-1))); + v128_t a1 = wasm_i64x2_shr(wasm_i64x2_add(a.val, delta), n); + v128_t r = wasm_v8x16_shuffle(a1, a1, 0,1,2,3,8,9,10,11,0,1,2,3,8,9,10,11); + int t_ptr[4]; + wasm_v128_store(t_ptr, r); + for (int i=0; i<2; ++i) { + ptr[i] = t_ptr[i]; + } +#else + fallback::v_int64x2 _a(a); + fallback::v_rshr_pack_store(ptr, _a); +#endif +} +template +inline void v_rshr_pack_u_store(uchar* ptr, const v_int16x8& a) +{ + v128_t delta = wasm_i16x8_splat(((short)1 << (n-1))); + v128_t a1 = wasm_i16x8_shr(wasm_i16x8_add(a.val, delta), n); + v128_t maxval = wasm_i16x8_splat(255); + v128_t minval = wasm_i16x8_splat(0); + v128_t a2 = wasm_v128_bitselect(maxval, a1, wasm_i16x8_gt(a1, maxval)); + v128_t a3 = wasm_v128_bitselect(minval, a2, wasm_i16x8_lt(a1, minval)); + v128_t r = wasm_v8x16_shuffle(a3, a3, 0,2,4,6,8,10,12,14,0,2,4,6,8,10,12,14); + uchar t_ptr[16]; + wasm_v128_store(t_ptr, r); + for (int i=0; i<8; ++i) { + ptr[i] = t_ptr[i]; + } +} +template +inline void v_rshr_pack_u_store(ushort* ptr, const v_int32x4& a) +{ + v128_t delta = wasm_i32x4_splat(((int)1 << (n-1))); + v128_t a1 = wasm_i32x4_shr(wasm_i32x4_add(a.val, delta), n); + v128_t maxval = wasm_i32x4_splat(65535); + v128_t minval = wasm_i32x4_splat(0); + v128_t a2 = wasm_v128_bitselect(maxval, a1, wasm_i32x4_gt(a1, maxval)); + v128_t a3 = wasm_v128_bitselect(minval, a2, wasm_i32x4_lt(a1, minval)); + v128_t r = wasm_v8x16_shuffle(a3, a3, 0,1,4,5,8,9,12,13,0,1,4,5,8,9,12,13); + ushort t_ptr[8]; + wasm_v128_store(t_ptr, r); + for (int i=0; i<4; ++i) { + ptr[i] = t_ptr[i]; + } +} + +inline v_uint8x16 v_pack_b(const v_uint16x8& a, const v_uint16x8& b) +{ + v128_t maxval = wasm_i16x8_splat(255); + v128_t a1 = wasm_v128_bitselect(maxval, a.val, wasm_u16x8_gt(a.val, maxval)); + v128_t b1 = wasm_v128_bitselect(maxval, b.val, wasm_u16x8_gt(b.val, maxval)); + return v_uint8x16(wasm_v8x16_shuffle(a1, b1, 0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30)); +} + +inline v_uint8x16 v_pack_b(const v_uint32x4& a, const v_uint32x4& b, + const v_uint32x4& c, const v_uint32x4& d) +{ + v128_t maxval = wasm_i32x4_splat(255); + v128_t a1 = wasm_v128_bitselect(maxval, a.val, wasm_u32x4_gt(a.val, maxval)); + v128_t b1 = wasm_v128_bitselect(maxval, b.val, wasm_u32x4_gt(b.val, maxval)); + v128_t c1 = wasm_v128_bitselect(maxval, c.val, wasm_u32x4_gt(c.val, maxval)); + v128_t d1 = wasm_v128_bitselect(maxval, d.val, wasm_u32x4_gt(d.val, maxval)); + v128_t ab = wasm_v8x16_shuffle(a1, b1, 0,4,8,12,16,20,24,28,0,4,8,12,16,20,24,28); + v128_t cd = wasm_v8x16_shuffle(c1, d1, 0,4,8,12,16,20,24,28,0,4,8,12,16,20,24,28); + return v_uint8x16(wasm_v8x16_shuffle(ab, cd, 0,1,2,3,4,5,6,7,16,17,18,19,20,21,22,23)); +} + +inline v_uint8x16 v_pack_b(const v_uint64x2& a, const v_uint64x2& b, const v_uint64x2& c, + const v_uint64x2& d, const v_uint64x2& e, const v_uint64x2& f, + const v_uint64x2& g, const v_uint64x2& h) +{ +#ifdef __wasm_unimplemented_simd128__ + v128_t maxval = wasm_i32x4_splat(255); + v128_t a1 = wasm_v128_bitselect(maxval, a.val, ((__u64x2)(a.val) > (__u64x2)maxval)); + v128_t b1 = wasm_v128_bitselect(maxval, b.val, ((__u64x2)(b.val) > (__u64x2)maxval)); + v128_t c1 = wasm_v128_bitselect(maxval, c.val, ((__u64x2)(c.val) > (__u64x2)maxval)); + v128_t d1 = wasm_v128_bitselect(maxval, d.val, ((__u64x2)(d.val) > (__u64x2)maxval)); + v128_t e1 = wasm_v128_bitselect(maxval, e.val, ((__u64x2)(e.val) > (__u64x2)maxval)); + v128_t f1 = wasm_v128_bitselect(maxval, f.val, ((__u64x2)(f.val) > (__u64x2)maxval)); + v128_t g1 = wasm_v128_bitselect(maxval, g.val, ((__u64x2)(g.val) > (__u64x2)maxval)); + v128_t h1 = wasm_v128_bitselect(maxval, h.val, ((__u64x2)(h.val) > (__u64x2)maxval)); + v128_t ab = wasm_v8x16_shuffle(a1, b1, 0,8,16,24,0,8,16,24,0,8,16,24,0,8,16,24); + v128_t cd = wasm_v8x16_shuffle(c1, d1, 0,8,16,24,0,8,16,24,0,8,16,24,0,8,16,24); + v128_t ef = wasm_v8x16_shuffle(e1, f1, 0,8,16,24,0,8,16,24,0,8,16,24,0,8,16,24); + v128_t gh = wasm_v8x16_shuffle(g1, h1, 0,8,16,24,0,8,16,24,0,8,16,24,0,8,16,24); + v128_t abcd = wasm_v8x16_shuffle(ab, cd, 0,1,2,3,16,17,18,19,0,1,2,3,16,17,18,19); + v128_t efgh = wasm_v8x16_shuffle(ef, gh, 0,1,2,3,16,17,18,19,0,1,2,3,16,17,18,19); + return v_uint8x16(wasm_v8x16_shuffle(abcd, efgh, 0,1,2,3,4,5,6,7,16,17,18,19,20,21,22,23)); +#else + fallback::v_uint64x2 a_(a), b_(b), c_(c), d_(d), e_(e), f_(f), g_(g), h_(h); + return fallback::v_pack_b(a_, b_, c_, d_, e_, f_, g_, h_); +#endif +} + +inline v_float32x4 v_matmul(const v_float32x4& v, const v_float32x4& m0, + const v_float32x4& m1, const v_float32x4& m2, + const v_float32x4& m3) +{ + v128_t v0 = wasm_f32x4_splat(wasm_f32x4_extract_lane(v.val, 0)); + v128_t v1 = wasm_f32x4_splat(wasm_f32x4_extract_lane(v.val, 1)); + v128_t v2 = wasm_f32x4_splat(wasm_f32x4_extract_lane(v.val, 2)); + v128_t v3 = wasm_f32x4_splat(wasm_f32x4_extract_lane(v.val, 3)); + v0 = wasm_f32x4_mul(v0, m0.val); + v1 = wasm_f32x4_mul(v1, m1.val); + v2 = wasm_f32x4_mul(v2, m2.val); + v3 = wasm_f32x4_mul(v3, m3.val); + + return v_float32x4(wasm_f32x4_add(wasm_f32x4_add(v0, v1), wasm_f32x4_add(v2, v3))); +} + +inline v_float32x4 v_matmuladd(const v_float32x4& v, const v_float32x4& m0, + const v_float32x4& m1, const v_float32x4& m2, + const v_float32x4& a) +{ + v128_t v0 = wasm_f32x4_splat(wasm_f32x4_extract_lane(v.val, 0)); + v128_t v1 = wasm_f32x4_splat(wasm_f32x4_extract_lane(v.val, 1)); + v128_t v2 = wasm_f32x4_splat(wasm_f32x4_extract_lane(v.val, 2)); + v0 = wasm_f32x4_mul(v0, m0.val); + v1 = wasm_f32x4_mul(v1, m1.val); + v2 = wasm_f32x4_mul(v2, m2.val); + + return v_float32x4(wasm_f32x4_add(wasm_f32x4_add(v0, v1), wasm_f32x4_add(v2, a.val))); +} + +#define OPENCV_HAL_IMPL_WASM_BIN_OP(bin_op, _Tpvec, intrin) \ +inline _Tpvec operator bin_op (const _Tpvec& a, const _Tpvec& b) \ +{ \ + return _Tpvec(intrin(a.val, b.val)); \ +} \ +inline _Tpvec& operator bin_op##= (_Tpvec& a, const _Tpvec& b) \ +{ \ + a.val = intrin(a.val, b.val); \ + return a; \ +} + +OPENCV_HAL_IMPL_WASM_BIN_OP(+, v_uint8x16, wasm_u8x16_add_saturate) +OPENCV_HAL_IMPL_WASM_BIN_OP(-, v_uint8x16, wasm_u8x16_sub_saturate) +OPENCV_HAL_IMPL_WASM_BIN_OP(+, v_int8x16, wasm_i8x16_add_saturate) +OPENCV_HAL_IMPL_WASM_BIN_OP(-, v_int8x16, wasm_i8x16_sub_saturate) +OPENCV_HAL_IMPL_WASM_BIN_OP(+, v_uint16x8, wasm_u16x8_add_saturate) +OPENCV_HAL_IMPL_WASM_BIN_OP(-, v_uint16x8, wasm_u16x8_sub_saturate) +OPENCV_HAL_IMPL_WASM_BIN_OP(+, v_int16x8, wasm_i16x8_add_saturate) +OPENCV_HAL_IMPL_WASM_BIN_OP(-, v_int16x8, wasm_i16x8_sub_saturate) +OPENCV_HAL_IMPL_WASM_BIN_OP(+, v_uint32x4, wasm_i32x4_add) +OPENCV_HAL_IMPL_WASM_BIN_OP(-, v_uint32x4, wasm_i32x4_sub) +OPENCV_HAL_IMPL_WASM_BIN_OP(*, v_uint32x4, wasm_i32x4_mul) +OPENCV_HAL_IMPL_WASM_BIN_OP(+, v_int32x4, wasm_i32x4_add) +OPENCV_HAL_IMPL_WASM_BIN_OP(-, v_int32x4, wasm_i32x4_sub) +OPENCV_HAL_IMPL_WASM_BIN_OP(*, v_int32x4, wasm_i32x4_mul) +OPENCV_HAL_IMPL_WASM_BIN_OP(+, v_float32x4, wasm_f32x4_add) +OPENCV_HAL_IMPL_WASM_BIN_OP(-, v_float32x4, wasm_f32x4_sub) +OPENCV_HAL_IMPL_WASM_BIN_OP(*, v_float32x4, wasm_f32x4_mul) +OPENCV_HAL_IMPL_WASM_BIN_OP(/, v_float32x4, wasm_f32x4_div) + +#ifdef __wasm_unimplemented_simd128__ +OPENCV_HAL_IMPL_WASM_BIN_OP(+, v_uint64x2, wasm_i64x2_add) +OPENCV_HAL_IMPL_WASM_BIN_OP(-, v_uint64x2, wasm_i64x2_sub) +OPENCV_HAL_IMPL_WASM_BIN_OP(+, v_int64x2, wasm_i64x2_add) +OPENCV_HAL_IMPL_WASM_BIN_OP(-, v_int64x2, wasm_i64x2_sub) +OPENCV_HAL_IMPL_WASM_BIN_OP(+, v_float64x2, wasm_f64x2_add) +OPENCV_HAL_IMPL_WASM_BIN_OP(-, v_float64x2, wasm_f64x2_sub) +OPENCV_HAL_IMPL_WASM_BIN_OP(*, v_float64x2, wasm_f64x2_mul) +OPENCV_HAL_IMPL_WASM_BIN_OP(/, v_float64x2, wasm_f64x2_div) +#else +#define OPENCV_HAL_IMPL_FALLBACK_BIN_OP(bin_op, _Tpvec) \ +inline _Tpvec operator bin_op (const _Tpvec& a, const _Tpvec& b) \ +{ \ + fallback::_Tpvec a_(a), b_(b); \ + return _Tpvec((a_) bin_op (b_)); \ +} \ +inline _Tpvec& operator bin_op##= (_Tpvec& a, const _Tpvec& b) \ +{ \ + fallback::_Tpvec a_(a), b_(b); \ + a_ bin_op##= b_; \ + a = _Tpvec(a_); \ + return a; \ +} + +OPENCV_HAL_IMPL_FALLBACK_BIN_OP(+, v_uint64x2) +OPENCV_HAL_IMPL_FALLBACK_BIN_OP(-, v_uint64x2) +OPENCV_HAL_IMPL_FALLBACK_BIN_OP(+, v_int64x2) +OPENCV_HAL_IMPL_FALLBACK_BIN_OP(-, v_int64x2) +OPENCV_HAL_IMPL_FALLBACK_BIN_OP(+, v_float64x2) +OPENCV_HAL_IMPL_FALLBACK_BIN_OP(-, v_float64x2) +OPENCV_HAL_IMPL_FALLBACK_BIN_OP(*, v_float64x2) +OPENCV_HAL_IMPL_FALLBACK_BIN_OP(/, v_float64x2) +#endif + +// saturating multiply 8-bit, 16-bit +#define OPENCV_HAL_IMPL_WASM_MUL_SAT(_Tpvec, _Tpwvec) \ +inline _Tpvec operator * (const _Tpvec& a, const _Tpvec& b) \ +{ \ + _Tpwvec c, d; \ + v_mul_expand(a, b, c, d); \ + return v_pack(c, d); \ +} \ +inline _Tpvec& operator *= (_Tpvec& a, const _Tpvec& b) \ +{ a = a * b; return a; } + +OPENCV_HAL_IMPL_WASM_MUL_SAT(v_uint8x16, v_uint16x8) +OPENCV_HAL_IMPL_WASM_MUL_SAT(v_int8x16, v_int16x8) +OPENCV_HAL_IMPL_WASM_MUL_SAT(v_uint16x8, v_uint32x4) +OPENCV_HAL_IMPL_WASM_MUL_SAT(v_int16x8, v_int32x4) + +// Multiply and expand +inline void v_mul_expand(const v_uint8x16& a, const v_uint8x16& b, + v_uint16x8& c, v_uint16x8& d) +{ + v_uint16x8 a0, a1, b0, b1; + v_expand(a, a0, a1); + v_expand(b, b0, b1); + c = v_mul_wrap(a0, b0); + d = v_mul_wrap(a1, b1); +} + +inline void v_mul_expand(const v_int8x16& a, const v_int8x16& b, + v_int16x8& c, v_int16x8& d) +{ + v_int16x8 a0, a1, b0, b1; + v_expand(a, a0, a1); + v_expand(b, b0, b1); + c = v_mul_wrap(a0, b0); + d = v_mul_wrap(a1, b1); +} + +inline void v_mul_expand(const v_int16x8& a, const v_int16x8& b, + v_int32x4& c, v_int32x4& d) +{ + v_int32x4 a0, a1, b0, b1; + v_expand(a, a0, a1); + v_expand(b, b0, b1); + c.val = wasm_i32x4_mul(a0.val, b0.val); + d.val = wasm_i32x4_mul(a1.val, b1.val); +} + +inline void v_mul_expand(const v_uint16x8& a, const v_uint16x8& b, + v_uint32x4& c, v_uint32x4& d) +{ + v_uint32x4 a0, a1, b0, b1; + v_expand(a, a0, a1); + v_expand(b, b0, b1); + c.val = wasm_i32x4_mul(a0.val, b0.val); + d.val = wasm_i32x4_mul(a1.val, b1.val); +} + +inline void v_mul_expand(const v_uint32x4& a, const v_uint32x4& b, + v_uint64x2& c, v_uint64x2& d) +{ +#ifdef __wasm_unimplemented_simd128__ + v_uint64x2 a0, a1, b0, b1; + v_expand(a, a0, a1); + v_expand(b, b0, b1); + c.val = ((__u64x2)(a0.val) * (__u64x2)(b0.val)); + d.val = ((__u64x2)(a1.val) * (__u64x2)(b1.val)); +#else + fallback::v_uint32x4 a_(a), b_(b); + fallback::v_uint64x2 c_, d_; + fallback::v_mul_expand(a_, b_, c_, d_); + c = v_uint64x2(c_); + d = v_uint64x2(d_); +#endif +} + +inline v_int16x8 v_mul_hi(const v_int16x8& a, const v_int16x8& b) +{ + v_int32x4 a0, a1, b0, b1; + v_expand(a, a0, a1); + v_expand(b, b0, b1); + v128_t c = wasm_i32x4_mul(a0.val, b0.val); + v128_t d = wasm_i32x4_mul(a1.val, b1.val); + return v_int16x8(wasm_v8x16_shuffle(c, d, 2,3,6,7,10,11,14,15,18,19,22,23,26,27,30,31)); +} +inline v_uint16x8 v_mul_hi(const v_uint16x8& a, const v_uint16x8& b) +{ + v_uint32x4 a0, a1, b0, b1; + v_expand(a, a0, a1); + v_expand(b, b0, b1); + v128_t c = wasm_i32x4_mul(a0.val, b0.val); + v128_t d = wasm_i32x4_mul(a1.val, b1.val); + return v_uint16x8(wasm_v8x16_shuffle(c, d, 2,3,6,7,10,11,14,15,18,19,22,23,26,27,30,31)); +} + +//////// Dot Product //////// + +inline v_int32x4 v_dotprod(const v_int16x8& a, const v_int16x8& b) +{ + v128_t a0 = wasm_i32x4_shr(wasm_i32x4_shl(a.val, 16), 16); + v128_t a1 = wasm_i32x4_shr(a.val, 16); + v128_t b0 = wasm_i32x4_shr(wasm_i32x4_shl(b.val, 16), 16); + v128_t b1 = wasm_i32x4_shr(b.val, 16); + v128_t c = wasm_i32x4_mul(a0, b0); + v128_t d = wasm_i32x4_mul(a1, b1); + return v_int32x4(wasm_i32x4_add(c, d)); +} + +inline v_int32x4 v_dotprod(const v_int16x8& a, const v_int16x8& b, const v_int32x4& c) +{ return v_dotprod(a, b) + c; } + +inline v_int64x2 v_dotprod(const v_int32x4& a, const v_int32x4& b) +{ +#ifdef __wasm_unimplemented_simd128__ + v128_t a0 = wasm_i64x2_shr(wasm_i64x2_shl(a.val, 32), 32); + v128_t a1 = wasm_i64x2_shr(a.val, 32); + v128_t b0 = wasm_i64x2_shr(wasm_i64x2_shl(b.val, 32), 32); + v128_t b1 = wasm_i64x2_shr(b.val, 32); + v128_t c = (v128_t)((__i64x2)a0 * (__i64x2)b0); + v128_t d = (v128_t)((__i64x2)a1 * (__i64x2)b1); + return v_int64x2(wasm_i64x2_add(c, d)); +#else + fallback::v_int32x4 a_(a); + fallback::v_int32x4 b_(b); + return fallback::v_dotprod(a_, b_); +#endif +} +inline v_int64x2 v_dotprod(const v_int32x4& a, const v_int32x4& b, const v_int64x2& c) +{ +#ifdef __wasm_unimplemented_simd128__ + return v_dotprod(a, b) + c; +#else + fallback::v_int32x4 a_(a); + fallback::v_int32x4 b_(b); + fallback::v_int64x2 c_(c); + return fallback::v_dotprod(a_, b_, c_); +#endif +} + +// 8 >> 32 +inline v_uint32x4 v_dotprod_expand(const v_uint8x16& a, const v_uint8x16& b) +{ + v128_t a0 = wasm_u16x8_shr(wasm_i16x8_shl(a.val, 8), 8); + v128_t a1 = wasm_u16x8_shr(a.val, 8); + v128_t b0 = wasm_u16x8_shr(wasm_i16x8_shl(b.val, 8), 8); + v128_t b1 = wasm_u16x8_shr(b.val, 8); + return v_uint32x4(( + v_dotprod(v_int16x8(a0), v_int16x8(b0)) + + v_dotprod(v_int16x8(a1), v_int16x8(b1))).val + ); +} +inline v_uint32x4 v_dotprod_expand(const v_uint8x16& a, const v_uint8x16& b, const v_uint32x4& c) +{ return v_dotprod_expand(a, b) + c; } + +inline v_int32x4 v_dotprod_expand(const v_int8x16& a, const v_int8x16& b) +{ + v128_t a0 = wasm_i16x8_shr(wasm_i16x8_shl(a.val, 8), 8); + v128_t a1 = wasm_i16x8_shr(a.val, 8); + v128_t b0 = wasm_i16x8_shr(wasm_i16x8_shl(b.val, 8), 8); + v128_t b1 = wasm_i16x8_shr(b.val, 8); + return v_int32x4( + v_dotprod(v_int16x8(a0), v_int16x8(b0)) + + v_dotprod(v_int16x8(a1), v_int16x8(b1)) + ); +} +inline v_int32x4 v_dotprod_expand(const v_int8x16& a, const v_int8x16& b, const v_int32x4& c) +{ return v_dotprod_expand(a, b) + c; } + +// 16 >> 64 +inline v_uint64x2 v_dotprod_expand(const v_uint16x8& a, const v_uint16x8& b) +{ + fallback::v_uint16x8 a_(a); + fallback::v_uint16x8 b_(b); + return fallback::v_dotprod_expand(a_, b_); +} +inline v_uint64x2 v_dotprod_expand(const v_uint16x8& a, const v_uint16x8& b, const v_uint64x2& c) +{ + fallback::v_uint16x8 a_(a); + fallback::v_uint16x8 b_(b); + fallback::v_uint64x2 c_(c); + return fallback::v_dotprod_expand(a_, b_, c_); +} + +inline v_int64x2 v_dotprod_expand(const v_int16x8& a, const v_int16x8& b) +{ + fallback::v_int16x8 a_(a); + fallback::v_int16x8 b_(b); + return fallback::v_dotprod_expand(a_, b_); +} + +inline v_int64x2 v_dotprod_expand(const v_int16x8& a, const v_int16x8& b, const v_int64x2& c) +{ + fallback::v_int16x8 a_(a); + fallback::v_int16x8 b_(b); + fallback::v_int64x2 c_(c); + return fallback::v_dotprod_expand(a_, b_, c_); +} + +// 32 >> 64f +inline v_float64x2 v_dotprod_expand(const v_int32x4& a, const v_int32x4& b) +{ return v_cvt_f64(v_dotprod(a, b)); } +inline v_float64x2 v_dotprod_expand(const v_int32x4& a, const v_int32x4& b, const v_float64x2& c) +{ return v_dotprod_expand(a, b) + c; } + +//////// Fast Dot Product //////// + +// 16 >> 32 +inline v_int32x4 v_dotprod_fast(const v_int16x8& a, const v_int16x8& b) +{ return v_dotprod(a, b); } +inline v_int32x4 v_dotprod_fast(const v_int16x8& a, const v_int16x8& b, const v_int32x4& c) +{ return v_dotprod(a, b, c); } + +// 32 >> 64 +inline v_int64x2 v_dotprod_fast(const v_int32x4& a, const v_int32x4& b) +{ return v_dotprod(a, b); } +inline v_int64x2 v_dotprod_fast(const v_int32x4& a, const v_int32x4& b, const v_int64x2& c) +{ return v_dotprod(a, b, c); } + +// 8 >> 32 +inline v_uint32x4 v_dotprod_expand_fast(const v_uint8x16& a, const v_uint8x16& b) +{ return v_dotprod_expand(a, b); } +inline v_uint32x4 v_dotprod_expand_fast(const v_uint8x16& a, const v_uint8x16& b, const v_uint32x4& c) +{ return v_dotprod_expand(a, b, c); } +inline v_int32x4 v_dotprod_expand_fast(const v_int8x16& a, const v_int8x16& b) +{ return v_dotprod_expand(a, b); } +inline v_int32x4 v_dotprod_expand_fast(const v_int8x16& a, const v_int8x16& b, const v_int32x4& c) +{ return v_dotprod_expand(a, b, c); } + +// 16 >> 64 +inline v_uint64x2 v_dotprod_expand_fast(const v_uint16x8& a, const v_uint16x8& b) +{ return v_dotprod_expand(a, b); } +inline v_uint64x2 v_dotprod_expand_fast(const v_uint16x8& a, const v_uint16x8& b, const v_uint64x2& c) +{ return v_dotprod_expand(a, b, c); } +inline v_int64x2 v_dotprod_expand_fast(const v_int16x8& a, const v_int16x8& b) +{ return v_dotprod_expand(a, b); } +inline v_int64x2 v_dotprod_expand_fast(const v_int16x8& a, const v_int16x8& b, const v_int64x2& c) +{ return v_dotprod_expand(a, b, c); } + +// 32 >> 64f +inline v_float64x2 v_dotprod_expand_fast(const v_int32x4& a, const v_int32x4& b) +{ return v_dotprod_expand(a, b); } +inline v_float64x2 v_dotprod_expand_fast(const v_int32x4& a, const v_int32x4& b, const v_float64x2& c) +{ return v_dotprod_expand(a, b, c); } + +#define OPENCV_HAL_IMPL_WASM_LOGIC_OP(_Tpvec) \ +OPENCV_HAL_IMPL_WASM_BIN_OP(&, _Tpvec, wasm_v128_and) \ +OPENCV_HAL_IMPL_WASM_BIN_OP(|, _Tpvec, wasm_v128_or) \ +OPENCV_HAL_IMPL_WASM_BIN_OP(^, _Tpvec, wasm_v128_xor) \ +inline _Tpvec operator ~ (const _Tpvec& a) \ +{ \ + return _Tpvec(wasm_v128_not(a.val)); \ +} + +OPENCV_HAL_IMPL_WASM_LOGIC_OP(v_uint8x16) +OPENCV_HAL_IMPL_WASM_LOGIC_OP(v_int8x16) +OPENCV_HAL_IMPL_WASM_LOGIC_OP(v_uint16x8) +OPENCV_HAL_IMPL_WASM_LOGIC_OP(v_int16x8) +OPENCV_HAL_IMPL_WASM_LOGIC_OP(v_uint32x4) +OPENCV_HAL_IMPL_WASM_LOGIC_OP(v_int32x4) +OPENCV_HAL_IMPL_WASM_LOGIC_OP(v_uint64x2) +OPENCV_HAL_IMPL_WASM_LOGIC_OP(v_int64x2) +OPENCV_HAL_IMPL_WASM_LOGIC_OP(v_float32x4) +OPENCV_HAL_IMPL_WASM_LOGIC_OP(v_float64x2) + +inline v_float32x4 v_sqrt(const v_float32x4& x) +{ +#ifdef __wasm_unimplemented_simd128__ + return v_float32x4(wasm_f32x4_sqrt(x.val)); +#else + fallback::v_float32x4 x_(x); + return fallback::v_sqrt(x_); +#endif +} + +inline v_float32x4 v_invsqrt(const v_float32x4& x) +{ +#ifdef __wasm_unimplemented_simd128__ + const v128_t _1_0 = wasm_f32x4_splat(1.0); + return v_float32x4(wasm_f32x4_div(_1_0, wasm_f32x4_sqrt(x.val))); +#else + fallback::v_float32x4 x_(x); + return fallback::v_invsqrt(x_); +#endif +} + +inline v_float64x2 v_sqrt(const v_float64x2& x) +{ +#ifdef __wasm_unimplemented_simd128__ + return v_float64x2(wasm_f64x2_sqrt(x.val)); +#else + fallback::v_float64x2 x_(x); + return fallback::v_sqrt(x_); +#endif +} + +inline v_float64x2 v_invsqrt(const v_float64x2& x) +{ +#ifdef __wasm_unimplemented_simd128__ + const v128_t _1_0 = wasm_f64x2_splat(1.0); + return v_float64x2(wasm_f64x2_div(_1_0, wasm_f64x2_sqrt(x.val))); +#else + fallback::v_float64x2 x_(x); + return fallback::v_invsqrt(x_); +#endif +} + +#define OPENCV_HAL_IMPL_WASM_ABS_INT_FUNC(_Tpuvec, _Tpsvec, suffix, zsuffix, shiftWidth) \ +inline _Tpuvec v_abs(const _Tpsvec& x) \ +{ \ + v128_t s = wasm_##suffix##_shr(x.val, shiftWidth); \ + v128_t f = wasm_##zsuffix##_shr(x.val, shiftWidth); \ + return _Tpuvec(wasm_##zsuffix##_add(wasm_v128_xor(x.val, f), s)); \ +} + +OPENCV_HAL_IMPL_WASM_ABS_INT_FUNC(v_uint8x16, v_int8x16, u8x16, i8x16, 7) +OPENCV_HAL_IMPL_WASM_ABS_INT_FUNC(v_uint16x8, v_int16x8, u16x8, i16x8, 15) +OPENCV_HAL_IMPL_WASM_ABS_INT_FUNC(v_uint32x4, v_int32x4, u32x4, i32x4, 31) + +inline v_float32x4 v_abs(const v_float32x4& x) +{ return v_float32x4(wasm_f32x4_abs(x.val)); } +inline v_float64x2 v_abs(const v_float64x2& x) +{ +#ifdef __wasm_unimplemented_simd128__ + return v_float64x2(wasm_f64x2_abs(x.val)); +#else + fallback::v_float64x2 x_(x); + return fallback::v_abs(x_); +#endif +} + +// TODO: exp, log, sin, cos + +#define OPENCV_HAL_IMPL_WASM_BIN_FUNC(_Tpvec, func, intrin) \ +inline _Tpvec func(const _Tpvec& a, const _Tpvec& b) \ +{ \ + return _Tpvec(intrin(a.val, b.val)); \ +} + +OPENCV_HAL_IMPL_WASM_BIN_FUNC(v_float32x4, v_min, wasm_f32x4_min) +OPENCV_HAL_IMPL_WASM_BIN_FUNC(v_float32x4, v_max, wasm_f32x4_max) + +#ifdef __wasm_unimplemented_simd128__ +OPENCV_HAL_IMPL_WASM_BIN_FUNC(v_float64x2, v_min, wasm_f64x2_min) +OPENCV_HAL_IMPL_WASM_BIN_FUNC(v_float64x2, v_max, wasm_f64x2_max) +#else +#define OPENCV_HAL_IMPL_WASM_MINMAX_64f_FUNC(func) \ +inline v_float64x2 func(const v_float64x2& a, const v_float64x2& b) \ +{ \ + fallback::v_float64x2 a_(a), b_(b); \ + return fallback::func(a_, b_); \ +} + +OPENCV_HAL_IMPL_WASM_MINMAX_64f_FUNC(v_min) +OPENCV_HAL_IMPL_WASM_MINMAX_64f_FUNC(v_max) +#endif + +#define OPENCV_HAL_IMPL_WASM_MINMAX_S_INIT_FUNC(_Tpvec, suffix) \ +inline _Tpvec v_min(const _Tpvec& a, const _Tpvec& b) \ +{ \ + return _Tpvec(wasm_v128_bitselect(b.val, a.val, wasm_##suffix##_gt(a.val, b.val))); \ +} \ +inline _Tpvec v_max(const _Tpvec& a, const _Tpvec& b) \ +{ \ + return _Tpvec(wasm_v128_bitselect(a.val, b.val, wasm_##suffix##_gt(a.val, b.val))); \ +} + +OPENCV_HAL_IMPL_WASM_MINMAX_S_INIT_FUNC(v_int8x16, i8x16) +OPENCV_HAL_IMPL_WASM_MINMAX_S_INIT_FUNC(v_int16x8, i16x8) +OPENCV_HAL_IMPL_WASM_MINMAX_S_INIT_FUNC(v_int32x4, i32x4) + +#define OPENCV_HAL_IMPL_WASM_MINMAX_U_INIT_FUNC(_Tpvec, suffix, deltaNum) \ +inline _Tpvec v_min(const _Tpvec& a, const _Tpvec& b) \ +{ \ + v128_t delta = wasm_##suffix##_splat(deltaNum); \ + v128_t mask = wasm_##suffix##_gt(wasm_v128_xor(a.val, delta), wasm_v128_xor(b.val, delta)); \ + return _Tpvec(wasm_v128_bitselect(b.val, a.val, mask)); \ +} \ +inline _Tpvec v_max(const _Tpvec& a, const _Tpvec& b) \ +{ \ + v128_t delta = wasm_##suffix##_splat(deltaNum); \ + v128_t mask = wasm_##suffix##_gt(wasm_v128_xor(a.val, delta), wasm_v128_xor(b.val, delta)); \ + return _Tpvec(wasm_v128_bitselect(a.val, b.val, mask)); \ +} + +OPENCV_HAL_IMPL_WASM_MINMAX_U_INIT_FUNC(v_uint8x16, i8x16, (schar)0x80) +OPENCV_HAL_IMPL_WASM_MINMAX_U_INIT_FUNC(v_uint16x8, i16x8, (short)0x8000) +OPENCV_HAL_IMPL_WASM_MINMAX_U_INIT_FUNC(v_uint32x4, i32x4, (int)0x80000000) + +#define OPENCV_HAL_IMPL_WASM_INIT_CMP_OP(_Tpvec, suffix, esuffix) \ +inline _Tpvec operator == (const _Tpvec& a, const _Tpvec& b) \ +{ return _Tpvec(wasm_##esuffix##_eq(a.val, b.val)); } \ +inline _Tpvec operator != (const _Tpvec& a, const _Tpvec& b) \ +{ return _Tpvec(wasm_##esuffix##_ne(a.val, b.val)); } \ +inline _Tpvec operator < (const _Tpvec& a, const _Tpvec& b) \ +{ return _Tpvec(wasm_##suffix##_lt(a.val, b.val)); } \ +inline _Tpvec operator > (const _Tpvec& a, const _Tpvec& b) \ +{ return _Tpvec(wasm_##suffix##_gt(a.val, b.val)); } \ +inline _Tpvec operator <= (const _Tpvec& a, const _Tpvec& b) \ +{ return _Tpvec(wasm_##suffix##_le(a.val, b.val)); } \ +inline _Tpvec operator >= (const _Tpvec& a, const _Tpvec& b) \ +{ return _Tpvec(wasm_##suffix##_ge(a.val, b.val)); } + +OPENCV_HAL_IMPL_WASM_INIT_CMP_OP(v_uint8x16, u8x16, i8x16) +OPENCV_HAL_IMPL_WASM_INIT_CMP_OP(v_int8x16, i8x16, i8x16) +OPENCV_HAL_IMPL_WASM_INIT_CMP_OP(v_uint16x8, u16x8, i16x8) +OPENCV_HAL_IMPL_WASM_INIT_CMP_OP(v_int16x8, i16x8, i16x8) +OPENCV_HAL_IMPL_WASM_INIT_CMP_OP(v_uint32x4, u32x4, i32x4) +OPENCV_HAL_IMPL_WASM_INIT_CMP_OP(v_int32x4, i32x4, i32x4) +OPENCV_HAL_IMPL_WASM_INIT_CMP_OP(v_float32x4, f32x4, f32x4) + +#ifdef __wasm_unimplemented_simd128__ +OPENCV_HAL_IMPL_WASM_INIT_CMP_OP(v_float64x2, f64x2, f64x2) +#else +#define OPENCV_HAL_IMPL_INIT_FALLBACK_CMP_OP(_Tpvec, bin_op) \ +inline _Tpvec operator bin_op (const _Tpvec& a, const _Tpvec& b) \ +{ \ + fallback::_Tpvec a_(a), b_(b); \ + return _Tpvec((a_) bin_op (b_));\ +} \ + +OPENCV_HAL_IMPL_INIT_FALLBACK_CMP_OP(v_float64x2, ==) +OPENCV_HAL_IMPL_INIT_FALLBACK_CMP_OP(v_float64x2, !=) +OPENCV_HAL_IMPL_INIT_FALLBACK_CMP_OP(v_float64x2, <) +OPENCV_HAL_IMPL_INIT_FALLBACK_CMP_OP(v_float64x2, >) +OPENCV_HAL_IMPL_INIT_FALLBACK_CMP_OP(v_float64x2, <=) +OPENCV_HAL_IMPL_INIT_FALLBACK_CMP_OP(v_float64x2, >=) +#endif + +#define OPENCV_HAL_IMPL_WASM_64BIT_CMP_OP(_Tpvec, cast) \ +inline _Tpvec operator == (const _Tpvec& a, const _Tpvec& b) \ +{ return cast(v_reinterpret_as_f64(a) == v_reinterpret_as_f64(b)); } \ +inline _Tpvec operator != (const _Tpvec& a, const _Tpvec& b) \ +{ return cast(v_reinterpret_as_f64(a) != v_reinterpret_as_f64(b)); } + +OPENCV_HAL_IMPL_WASM_64BIT_CMP_OP(v_uint64x2, v_reinterpret_as_u64) +OPENCV_HAL_IMPL_WASM_64BIT_CMP_OP(v_int64x2, v_reinterpret_as_s64) + +inline v_float32x4 v_not_nan(const v_float32x4& a) +{ + v128_t z = wasm_i32x4_splat(0x7fffffff); + v128_t t = wasm_i32x4_splat(0x7f800000); + return v_float32x4(wasm_u32x4_lt(wasm_v128_and(a.val, z), t)); +} +inline v_float64x2 v_not_nan(const v_float64x2& a) +{ +#ifdef __wasm_unimplemented_simd128__ + v128_t z = wasm_i64x2_splat(0x7fffffffffffffff); + v128_t t = wasm_i64x2_splat(0x7ff0000000000000); + return v_float64x2((__u64x2)(wasm_v128_and(a.val, z)) < (__u64x2)t); +#else + fallback::v_float64x2 a_(a); + return fallback::v_not_nan(a_); +#endif +} + +OPENCV_HAL_IMPL_WASM_BIN_FUNC(v_uint8x16, v_add_wrap, wasm_i8x16_add) +OPENCV_HAL_IMPL_WASM_BIN_FUNC(v_int8x16, v_add_wrap, wasm_i8x16_add) +OPENCV_HAL_IMPL_WASM_BIN_FUNC(v_uint16x8, v_add_wrap, wasm_i16x8_add) +OPENCV_HAL_IMPL_WASM_BIN_FUNC(v_int16x8, v_add_wrap, wasm_i16x8_add) +OPENCV_HAL_IMPL_WASM_BIN_FUNC(v_uint8x16, v_sub_wrap, wasm_i8x16_sub) +OPENCV_HAL_IMPL_WASM_BIN_FUNC(v_int8x16, v_sub_wrap, wasm_i8x16_sub) +OPENCV_HAL_IMPL_WASM_BIN_FUNC(v_uint16x8, v_sub_wrap, wasm_i16x8_sub) +OPENCV_HAL_IMPL_WASM_BIN_FUNC(v_int16x8, v_sub_wrap, wasm_i16x8_sub) +OPENCV_HAL_IMPL_WASM_BIN_FUNC(v_uint8x16, v_mul_wrap, wasm_i8x16_mul) +OPENCV_HAL_IMPL_WASM_BIN_FUNC(v_int8x16, v_mul_wrap, wasm_i8x16_mul) +OPENCV_HAL_IMPL_WASM_BIN_FUNC(v_uint16x8, v_mul_wrap, wasm_i16x8_mul) +OPENCV_HAL_IMPL_WASM_BIN_FUNC(v_int16x8, v_mul_wrap, wasm_i16x8_mul) + + +/** Absolute difference **/ + +inline v_uint8x16 v_absdiff(const v_uint8x16& a, const v_uint8x16& b) +{ return v_add_wrap(a - b, b - a); } +inline v_uint16x8 v_absdiff(const v_uint16x8& a, const v_uint16x8& b) +{ return v_add_wrap(a - b, b - a); } +inline v_uint32x4 v_absdiff(const v_uint32x4& a, const v_uint32x4& b) +{ return v_max(a, b) - v_min(a, b); } + +inline v_uint8x16 v_absdiff(const v_int8x16& a, const v_int8x16& b) +{ + v_int8x16 d = v_sub_wrap(a, b); + v_int8x16 m = a < b; + return v_reinterpret_as_u8(v_sub_wrap(d ^ m, m)); +} +inline v_uint16x8 v_absdiff(const v_int16x8& a, const v_int16x8& b) +{ + return v_reinterpret_as_u16(v_sub_wrap(v_max(a, b), v_min(a, b))); +} +inline v_uint32x4 v_absdiff(const v_int32x4& a, const v_int32x4& b) +{ + v_int32x4 d = a - b; + v_int32x4 m = a < b; + return v_reinterpret_as_u32((d ^ m) - m); +} + +/** Saturating absolute difference **/ +inline v_int8x16 v_absdiffs(const v_int8x16& a, const v_int8x16& b) +{ + v_int8x16 d = a - b; + v_int8x16 m = a < b; + return (d ^ m) - m; + } +inline v_int16x8 v_absdiffs(const v_int16x8& a, const v_int16x8& b) +{ return v_max(a, b) - v_min(a, b); } + + +inline v_int32x4 v_fma(const v_int32x4& a, const v_int32x4& b, const v_int32x4& c) +{ + return a * b + c; +} + +inline v_int32x4 v_muladd(const v_int32x4& a, const v_int32x4& b, const v_int32x4& c) +{ + return v_fma(a, b, c); +} + +inline v_float32x4 v_fma(const v_float32x4& a, const v_float32x4& b, const v_float32x4& c) +{ + return a * b + c; +} + +inline v_float64x2 v_fma(const v_float64x2& a, const v_float64x2& b, const v_float64x2& c) +{ + return a * b + c; +} + +inline v_float32x4 v_absdiff(const v_float32x4& a, const v_float32x4& b) +{ + v128_t absmask_vec = wasm_i32x4_splat(0x7fffffff); + return v_float32x4(wasm_v128_and(wasm_f32x4_sub(a.val, b.val), absmask_vec)); +} +inline v_float64x2 v_absdiff(const v_float64x2& a, const v_float64x2& b) +{ +#ifdef __wasm_unimplemented_simd128__ + v128_t absmask_vec = wasm_u64x2_shr(wasm_i32x4_splat(-1), 1); + return v_float64x2(wasm_v128_and(wasm_f64x2_sub(a.val, b.val), absmask_vec)); +#else + fallback::v_float64x2 a_(a), b_(b); + return fallback::v_absdiff(a_, b_); +#endif +} + +#define OPENCV_HAL_IMPL_WASM_MISC_FLT_OP(_Tpvec) \ +inline _Tpvec v_magnitude(const _Tpvec& a, const _Tpvec& b) \ +{ \ + fallback::_Tpvec a_(a), b_(b); \ + return fallback::v_magnitude(a_, b_); \ +} \ +inline _Tpvec v_sqr_magnitude(const _Tpvec& a, const _Tpvec& b) \ +{ \ + return v_fma(a, a, b*b); \ +} \ +inline _Tpvec v_muladd(const _Tpvec& a, const _Tpvec& b, const _Tpvec& c) \ +{ \ + return v_fma(a, b, c); \ +} + +OPENCV_HAL_IMPL_WASM_MISC_FLT_OP(v_float32x4) +OPENCV_HAL_IMPL_WASM_MISC_FLT_OP(v_float64x2) + +#define OPENCV_HAL_IMPL_WASM_SHIFT_OP(_Tpuvec, _Tpsvec, suffix, ssuffix) \ +inline _Tpuvec operator << (const _Tpuvec& a, int imm) \ +{ \ + return _Tpuvec(wasm_##suffix##_shl(a.val, imm)); \ +} \ +inline _Tpsvec operator << (const _Tpsvec& a, int imm) \ +{ \ + return _Tpsvec(wasm_##suffix##_shl(a.val, imm)); \ +} \ +inline _Tpuvec operator >> (const _Tpuvec& a, int imm) \ +{ \ + return _Tpuvec(wasm_##ssuffix##_shr(a.val, imm)); \ +} \ +inline _Tpsvec operator >> (const _Tpsvec& a, int imm) \ +{ \ + return _Tpsvec(wasm_##suffix##_shr(a.val, imm)); \ +} \ +template \ +inline _Tpuvec v_shl(const _Tpuvec& a) \ +{ \ + return _Tpuvec(wasm_##suffix##_shl(a.val, imm)); \ +} \ +template \ +inline _Tpsvec v_shl(const _Tpsvec& a) \ +{ \ + return _Tpsvec(wasm_##suffix##_shl(a.val, imm)); \ +} \ +template \ +inline _Tpuvec v_shr(const _Tpuvec& a) \ +{ \ + return _Tpuvec(wasm_##ssuffix##_shr(a.val, imm)); \ +} \ +template \ +inline _Tpsvec v_shr(const _Tpsvec& a) \ +{ \ + return _Tpsvec(wasm_##suffix##_shr(a.val, imm)); \ +} + +OPENCV_HAL_IMPL_WASM_SHIFT_OP(v_uint8x16, v_int8x16, i8x16, u8x16) +OPENCV_HAL_IMPL_WASM_SHIFT_OP(v_uint16x8, v_int16x8, i16x8, u16x8) +OPENCV_HAL_IMPL_WASM_SHIFT_OP(v_uint32x4, v_int32x4, i32x4, u32x4) + +#ifdef __wasm_unimplemented_simd128__ +OPENCV_HAL_IMPL_WASM_SHIFT_OP(v_uint64x2, v_int64x2, i64x2, u64x2) +#else +#define OPENCV_HAL_IMPL_FALLBACK_SHIFT_OP(_Tpvec) \ +inline _Tpvec operator << (const _Tpvec& a, int imm) \ +{ \ + fallback::_Tpvec a_(a); \ + return a_ << imm; \ +} \ +inline _Tpvec operator >> (const _Tpvec& a, int imm) \ +{ \ + fallback::_Tpvec a_(a); \ + return a_ >> imm; \ +} \ +template \ +inline _Tpvec v_shl(const _Tpvec& a) \ +{ \ + fallback::_Tpvec a_(a); \ + return fallback::v_shl(a_); \ +} \ +template \ +inline _Tpvec v_shr(const _Tpvec& a) \ +{ \ + fallback::_Tpvec a_(a); \ + return fallback::v_shr(a_); \ +} \ + +OPENCV_HAL_IMPL_FALLBACK_SHIFT_OP(v_uint64x2) +OPENCV_HAL_IMPL_FALLBACK_SHIFT_OP(v_int64x2) +#endif + +namespace hal_wasm_internal +{ + template 16)), + bool is_first = (imm == 0), + bool is_second = (imm == 16), + bool is_other = (((imm > 0) && (imm < 16)))> + class v_wasm_palignr_u8_class; + + template + class v_wasm_palignr_u8_class; + + template + class v_wasm_palignr_u8_class + { + public: + inline v128_t operator()(const v128_t& a, const v128_t&) const + { + return a; + } + }; + + template + class v_wasm_palignr_u8_class + { + public: + inline v128_t operator()(const v128_t&, const v128_t& b) const + { + return b; + } + }; + + template + class v_wasm_palignr_u8_class + { + public: + inline v128_t operator()(const v128_t& a, const v128_t& b) const + { + enum { imm2 = (sizeof(v128_t) - imm) }; + return wasm_v8x16_shuffle(a, b, + imm, imm+1, imm+2, imm+3, + imm+4, imm+5, imm+6, imm+7, + imm+8, imm+9, imm+10, imm+11, + imm+12, imm+13, imm+14, imm+15); + } + }; + + template + inline v128_t v_wasm_palignr_u8(const v128_t& a, const v128_t& b) + { + CV_StaticAssert((imm >= 0) && (imm <= 16), "Invalid imm for v_wasm_palignr_u8."); + return v_wasm_palignr_u8_class()(a, b); + } +} + +template +inline _Tpvec v_rotate_right(const _Tpvec &a) +{ + using namespace hal_wasm_internal; + enum { imm2 = (imm * sizeof(typename _Tpvec::lane_type)) }; + v128_t z = wasm_i8x16_splat(0); + return _Tpvec(v_wasm_palignr_u8(a.val, z)); +} + +template +inline _Tpvec v_rotate_left(const _Tpvec &a) +{ + using namespace hal_wasm_internal; + enum { imm2 = ((_Tpvec::nlanes - imm) * sizeof(typename _Tpvec::lane_type)) }; + v128_t z = wasm_i8x16_splat(0); + return _Tpvec(v_wasm_palignr_u8(z, a.val)); +} + +template +inline _Tpvec v_rotate_right(const _Tpvec &a, const _Tpvec &b) +{ + using namespace hal_wasm_internal; + enum { imm2 = (imm * sizeof(typename _Tpvec::lane_type)) }; + return _Tpvec(v_wasm_palignr_u8(a.val, b.val)); +} + +template +inline _Tpvec v_rotate_left(const _Tpvec &a, const _Tpvec &b) +{ + using namespace hal_wasm_internal; + enum { imm2 = ((_Tpvec::nlanes - imm) * sizeof(typename _Tpvec::lane_type)) }; + return _Tpvec(v_wasm_palignr_u8(b.val, a.val)); +} + +#define OPENCV_HAL_IMPL_WASM_LOADSTORE_INT_OP(_Tpvec, _Tp) \ +inline _Tpvec v_load(const _Tp* ptr) \ +{ return _Tpvec(wasm_v128_load(ptr)); } \ +inline _Tpvec v_load_aligned(const _Tp* ptr) \ +{ return _Tpvec(wasm_v128_load(ptr)); } \ +inline _Tpvec v_load_low(const _Tp* ptr) \ +{ \ + _Tp tmp[_Tpvec::nlanes] = {0}; \ + for (int i=0; i<_Tpvec::nlanes/2; ++i) { \ + tmp[i] = ptr[i]; \ + } \ + return _Tpvec(wasm_v128_load(tmp)); \ +} \ +inline _Tpvec v_load_halves(const _Tp* ptr0, const _Tp* ptr1) \ +{ \ + _Tp tmp[_Tpvec::nlanes]; \ + for (int i=0; i<_Tpvec::nlanes/2; ++i) { \ + tmp[i] = ptr0[i]; \ + tmp[i+_Tpvec::nlanes/2] = ptr1[i]; \ + } \ + return _Tpvec(wasm_v128_load(tmp)); \ +} \ +inline void v_store(_Tp* ptr, const _Tpvec& a) \ +{ wasm_v128_store(ptr, a.val); } \ +inline void v_store_aligned(_Tp* ptr, const _Tpvec& a) \ +{ wasm_v128_store(ptr, a.val); } \ +inline void v_store_aligned_nocache(_Tp* ptr, const _Tpvec& a) \ +{ wasm_v128_store(ptr, a.val); } \ +inline void v_store(_Tp* ptr, const _Tpvec& a, hal::StoreMode /*mode*/) \ +{ \ + wasm_v128_store(ptr, a.val); \ +} \ +inline void v_store_low(_Tp* ptr, const _Tpvec& a) \ +{ \ + fallback::_Tpvec a_(a); \ + fallback::v_store_low(ptr, a_); \ +} \ +inline void v_store_high(_Tp* ptr, const _Tpvec& a) \ +{ \ + fallback::_Tpvec a_(a); \ + fallback::v_store_high(ptr, a_); \ +} + +OPENCV_HAL_IMPL_WASM_LOADSTORE_INT_OP(v_uint8x16, uchar) +OPENCV_HAL_IMPL_WASM_LOADSTORE_INT_OP(v_int8x16, schar) +OPENCV_HAL_IMPL_WASM_LOADSTORE_INT_OP(v_uint16x8, ushort) +OPENCV_HAL_IMPL_WASM_LOADSTORE_INT_OP(v_int16x8, short) +OPENCV_HAL_IMPL_WASM_LOADSTORE_INT_OP(v_uint32x4, unsigned) +OPENCV_HAL_IMPL_WASM_LOADSTORE_INT_OP(v_int32x4, int) +OPENCV_HAL_IMPL_WASM_LOADSTORE_INT_OP(v_uint64x2, uint64) +OPENCV_HAL_IMPL_WASM_LOADSTORE_INT_OP(v_int64x2, int64) +OPENCV_HAL_IMPL_WASM_LOADSTORE_INT_OP(v_float32x4, float) +OPENCV_HAL_IMPL_WASM_LOADSTORE_INT_OP(v_float64x2, double) + + +/** Reverse **/ +inline v_uint8x16 v_reverse(const v_uint8x16 &a) +{ return v_uint8x16(wasm_v8x16_shuffle(a.val, a.val, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)); } + +inline v_int8x16 v_reverse(const v_int8x16 &a) +{ return v_reinterpret_as_s8(v_reverse(v_reinterpret_as_u8(a))); } + +inline v_uint16x8 v_reverse(const v_uint16x8 &a) +{ return v_uint16x8(wasm_v8x16_shuffle(a.val, a.val, 14, 15, 12, 13, 10, 11, 8, 9, 6, 7, 4, 5, 2, 3, 0, 1)); } + +inline v_int16x8 v_reverse(const v_int16x8 &a) +{ return v_reinterpret_as_s16(v_reverse(v_reinterpret_as_u16(a))); } + +inline v_uint32x4 v_reverse(const v_uint32x4 &a) +{ return v_uint32x4(wasm_v8x16_shuffle(a.val, a.val, 12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3)); } + +inline v_int32x4 v_reverse(const v_int32x4 &a) +{ return v_reinterpret_as_s32(v_reverse(v_reinterpret_as_u32(a))); } + +inline v_float32x4 v_reverse(const v_float32x4 &a) +{ return v_reinterpret_as_f32(v_reverse(v_reinterpret_as_u32(a))); } + +inline v_uint64x2 v_reverse(const v_uint64x2 &a) +{ return v_uint64x2(wasm_v8x16_shuffle(a.val, a.val, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7)); } + +inline v_int64x2 v_reverse(const v_int64x2 &a) +{ return v_reinterpret_as_s64(v_reverse(v_reinterpret_as_u64(a))); } + +inline v_float64x2 v_reverse(const v_float64x2 &a) +{ return v_reinterpret_as_f64(v_reverse(v_reinterpret_as_u64(a))); } + + +#define OPENCV_HAL_IMPL_WASM_REDUCE_OP_4_SUM(_Tpvec, scalartype, regtype, suffix, esuffix) \ +inline scalartype v_reduce_sum(const _Tpvec& a) \ +{ \ + regtype val = a.val; \ + val = wasm_##suffix##_add(val, wasm_v8x16_shuffle(val, val, 8,9,10,11,12,13,14,15,0,1,2,3,4,5,6,7)); \ + val = wasm_##suffix##_add(val, wasm_v8x16_shuffle(val, val, 4,5,6,7,8,9,10,11,12,13,14,15,0,1,2,3)); \ + return (scalartype)wasm_##esuffix##_extract_lane(val, 0); \ +} + +OPENCV_HAL_IMPL_WASM_REDUCE_OP_4_SUM(v_uint32x4, unsigned, v128_t, i32x4, i32x4) +OPENCV_HAL_IMPL_WASM_REDUCE_OP_4_SUM(v_int32x4, int, v128_t, i32x4, i32x4) +OPENCV_HAL_IMPL_WASM_REDUCE_OP_4_SUM(v_float32x4, float, v128_t, f32x4, f32x4) + +// To do: Optimize v_reduce_sum with wasm intrin. +// Now use fallback implementation as there is no widening op in wasm intrin. + +#define OPENCV_HAL_IMPL_FALLBACK_REDUCE_OP_SUM(_Tpvec, scalartype) \ +inline scalartype v_reduce_sum(const _Tpvec& a) \ +{ \ + fallback::_Tpvec a_(a); \ + return fallback::v_reduce_sum(a_); \ +} + +OPENCV_HAL_IMPL_FALLBACK_REDUCE_OP_SUM(v_uint8x16, unsigned) +OPENCV_HAL_IMPL_FALLBACK_REDUCE_OP_SUM(v_int8x16, int) +OPENCV_HAL_IMPL_FALLBACK_REDUCE_OP_SUM(v_uint16x8, unsigned) +OPENCV_HAL_IMPL_FALLBACK_REDUCE_OP_SUM(v_int16x8, int) +OPENCV_HAL_IMPL_FALLBACK_REDUCE_OP_SUM(v_uint64x2, uint64) +OPENCV_HAL_IMPL_FALLBACK_REDUCE_OP_SUM(v_int64x2, int64) +OPENCV_HAL_IMPL_FALLBACK_REDUCE_OP_SUM(v_float64x2, double) + +inline v_float32x4 v_reduce_sum4(const v_float32x4& a, const v_float32x4& b, + const v_float32x4& c, const v_float32x4& d) +{ + v128_t ac = wasm_f32x4_add(wasm_unpacklo_i32x4(a.val, c.val), wasm_unpackhi_i32x4(a.val, c.val)); + v128_t bd = wasm_f32x4_add(wasm_unpacklo_i32x4(b.val, d.val), wasm_unpackhi_i32x4(b.val, d.val)); + return v_float32x4(wasm_f32x4_add(wasm_unpacklo_i32x4(ac, bd), wasm_unpackhi_i32x4(ac, bd))); +} + +#define OPENCV_HAL_IMPL_WASM_REDUCE_OP(_Tpvec, scalartype, func, scalar_func) \ +inline scalartype v_reduce_##func(const _Tpvec& a) \ +{ \ + scalartype buf[_Tpvec::nlanes]; \ + v_store(buf, a); \ + scalartype tmp = buf[0]; \ + for (int i=1; i<_Tpvec::nlanes; ++i) { \ + tmp = scalar_func(tmp, buf[i]); \ + } \ + return tmp; \ +} + +OPENCV_HAL_IMPL_WASM_REDUCE_OP(v_uint8x16, uchar, max, std::max) +OPENCV_HAL_IMPL_WASM_REDUCE_OP(v_uint8x16, uchar, min, std::min) +OPENCV_HAL_IMPL_WASM_REDUCE_OP(v_int8x16, schar, max, std::max) +OPENCV_HAL_IMPL_WASM_REDUCE_OP(v_int8x16, schar, min, std::min) +OPENCV_HAL_IMPL_WASM_REDUCE_OP(v_uint16x8, ushort, max, std::max) +OPENCV_HAL_IMPL_WASM_REDUCE_OP(v_uint16x8, ushort, min, std::min) +OPENCV_HAL_IMPL_WASM_REDUCE_OP(v_int16x8, short, max, std::max) +OPENCV_HAL_IMPL_WASM_REDUCE_OP(v_int16x8, short, min, std::min) +OPENCV_HAL_IMPL_WASM_REDUCE_OP(v_uint32x4, unsigned, max, std::max) +OPENCV_HAL_IMPL_WASM_REDUCE_OP(v_uint32x4, unsigned, min, std::min) +OPENCV_HAL_IMPL_WASM_REDUCE_OP(v_int32x4, int, max, std::max) +OPENCV_HAL_IMPL_WASM_REDUCE_OP(v_int32x4, int, min, std::min) +OPENCV_HAL_IMPL_WASM_REDUCE_OP(v_float32x4, float, max, std::max) +OPENCV_HAL_IMPL_WASM_REDUCE_OP(v_float32x4, float, min, std::min) + +inline unsigned v_reduce_sad(const v_uint8x16& a, const v_uint8x16& b) +{ + v_uint16x8 l16, h16; + v_uint32x4 l16_l32, l16_h32, h16_l32, h16_h32; + v_expand(v_absdiff(a, b), l16, h16); + v_expand(l16, l16_l32, l16_h32); + v_expand(h16, h16_l32, h16_h32); + return v_reduce_sum(l16_l32+l16_h32+h16_l32+h16_h32); +} +inline unsigned v_reduce_sad(const v_int8x16& a, const v_int8x16& b) +{ + v_uint16x8 l16, h16; + v_uint32x4 l16_l32, l16_h32, h16_l32, h16_h32; + v_expand(v_absdiff(a, b), l16, h16); + v_expand(l16, l16_l32, l16_h32); + v_expand(h16, h16_l32, h16_h32); + return v_reduce_sum(l16_l32+l16_h32+h16_l32+h16_h32); +} +inline unsigned v_reduce_sad(const v_uint16x8& a, const v_uint16x8& b) +{ + v_uint32x4 l, h; + v_expand(v_absdiff(a, b), l, h); + return v_reduce_sum(l + h); +} +inline unsigned v_reduce_sad(const v_int16x8& a, const v_int16x8& b) +{ + v_uint32x4 l, h; + v_expand(v_absdiff(a, b), l, h); + return v_reduce_sum(l + h); +} +inline unsigned v_reduce_sad(const v_uint32x4& a, const v_uint32x4& b) +{ + return v_reduce_sum(v_absdiff(a, b)); +} +inline unsigned v_reduce_sad(const v_int32x4& a, const v_int32x4& b) +{ + return v_reduce_sum(v_absdiff(a, b)); +} +inline float v_reduce_sad(const v_float32x4& a, const v_float32x4& b) +{ + return v_reduce_sum(v_absdiff(a, b)); +} + +inline v_uint8x16 v_popcount(const v_uint8x16& a) +{ + v128_t m1 = wasm_i32x4_splat(0x55555555); + v128_t m2 = wasm_i32x4_splat(0x33333333); + v128_t m4 = wasm_i32x4_splat(0x0f0f0f0f); + v128_t p = a.val; + p = wasm_i32x4_add(wasm_v128_and(wasm_u32x4_shr(p, 1), m1), wasm_v128_and(p, m1)); + p = wasm_i32x4_add(wasm_v128_and(wasm_u32x4_shr(p, 2), m2), wasm_v128_and(p, m2)); + p = wasm_i32x4_add(wasm_v128_and(wasm_u32x4_shr(p, 4), m4), wasm_v128_and(p, m4)); + return v_uint8x16(p); +} +inline v_uint16x8 v_popcount(const v_uint16x8& a) +{ + v_uint8x16 p = v_popcount(v_reinterpret_as_u8(a)); + p += v_rotate_right<1>(p); + return v_reinterpret_as_u16(p) & v_setall_u16(0x00ff); +} +inline v_uint32x4 v_popcount(const v_uint32x4& a) +{ + v_uint8x16 p = v_popcount(v_reinterpret_as_u8(a)); + p += v_rotate_right<1>(p); + p += v_rotate_right<2>(p); + return v_reinterpret_as_u32(p) & v_setall_u32(0x000000ff); +} +inline v_uint64x2 v_popcount(const v_uint64x2& a) +{ + fallback::v_uint64x2 a_(a); + return fallback::v_popcount(a_); +} +inline v_uint8x16 v_popcount(const v_int8x16& a) +{ return v_popcount(v_reinterpret_as_u8(a)); } +inline v_uint16x8 v_popcount(const v_int16x8& a) +{ return v_popcount(v_reinterpret_as_u16(a)); } +inline v_uint32x4 v_popcount(const v_int32x4& a) +{ return v_popcount(v_reinterpret_as_u32(a)); } +inline v_uint64x2 v_popcount(const v_int64x2& a) +{ return v_popcount(v_reinterpret_as_u64(a)); } + +#define OPENCV_HAL_IMPL_WASM_CHECK_SIGNS(_Tpvec, suffix, scalarType) \ +inline int v_signmask(const _Tpvec& a) \ +{ \ + fallback::_Tpvec a_(a); \ + return fallback::v_signmask(a_); \ +} \ +inline bool v_check_all(const _Tpvec& a) \ +{ return wasm_i8x16_all_true(wasm_##suffix##_lt(a.val, wasm_##suffix##_splat(0))); } \ +inline bool v_check_any(const _Tpvec& a) \ +{ return wasm_i8x16_any_true(wasm_##suffix##_lt(a.val, wasm_##suffix##_splat(0)));; } + +OPENCV_HAL_IMPL_WASM_CHECK_SIGNS(v_uint8x16, i8x16, schar) +OPENCV_HAL_IMPL_WASM_CHECK_SIGNS(v_int8x16, i8x16, schar) +OPENCV_HAL_IMPL_WASM_CHECK_SIGNS(v_uint16x8, i16x8, short) +OPENCV_HAL_IMPL_WASM_CHECK_SIGNS(v_int16x8, i16x8, short) +OPENCV_HAL_IMPL_WASM_CHECK_SIGNS(v_uint32x4, i32x4, int) +OPENCV_HAL_IMPL_WASM_CHECK_SIGNS(v_int32x4, i32x4, int) +OPENCV_HAL_IMPL_WASM_CHECK_SIGNS(v_float32x4, i32x4, float) + +inline int v_signmask(const v_float64x2& a) +{ + fallback::v_float64x2 a_(a); + return fallback::v_signmask(a_); +} +inline bool v_check_all(const v_float64x2& a) +{ +#ifdef __wasm_unimplemented_simd128__ + return wasm_i8x16_all_true((__i64x2)(a.val) < (__i64x2)(wasm_i64x2_splat(0))); +#else + fallback::v_float64x2 a_(a); + return fallback::v_check_all(a_); +#endif +} +inline bool v_check_any(const v_float64x2& a) +{ +#ifdef __wasm_unimplemented_simd128__ + return wasm_i8x16_any_true((__i64x2)(a.val) < (__i64x2)(wasm_i64x2_splat(0)));; +#else + fallback::v_float64x2 a_(a); + return fallback::v_check_any(a_); +#endif +} + +inline int v_scan_forward(const v_int8x16& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s8(a))); } +inline int v_scan_forward(const v_uint8x16& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s8(a))); } +inline int v_scan_forward(const v_int16x8& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s8(a))) / 2; } +inline int v_scan_forward(const v_uint16x8& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s8(a))) / 2; } +inline int v_scan_forward(const v_int32x4& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s8(a))) / 4; } +inline int v_scan_forward(const v_uint32x4& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s8(a))) / 4; } +inline int v_scan_forward(const v_float32x4& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s8(a))) / 4; } +inline int v_scan_forward(const v_int64x2& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s8(a))) / 8; } +inline int v_scan_forward(const v_uint64x2& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s8(a))) / 8; } +inline int v_scan_forward(const v_float64x2& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s8(a))) / 8; } + +#define OPENCV_HAL_IMPL_WASM_SELECT(_Tpvec) \ +inline _Tpvec v_select(const _Tpvec& mask, const _Tpvec& a, const _Tpvec& b) \ +{ \ + return _Tpvec(wasm_v128_bitselect(a.val, b.val, mask.val)); \ +} + +OPENCV_HAL_IMPL_WASM_SELECT(v_uint8x16) +OPENCV_HAL_IMPL_WASM_SELECT(v_int8x16) +OPENCV_HAL_IMPL_WASM_SELECT(v_uint16x8) +OPENCV_HAL_IMPL_WASM_SELECT(v_int16x8) +OPENCV_HAL_IMPL_WASM_SELECT(v_uint32x4) +OPENCV_HAL_IMPL_WASM_SELECT(v_int32x4) +// OPENCV_HAL_IMPL_WASM_SELECT(v_uint64x2) +// OPENCV_HAL_IMPL_WASM_SELECT(v_int64x2) +OPENCV_HAL_IMPL_WASM_SELECT(v_float32x4) +OPENCV_HAL_IMPL_WASM_SELECT(v_float64x2) + +#define OPENCV_HAL_IMPL_WASM_EXPAND(_Tpvec, _Tpwvec, _Tp, intrin) \ +inline void v_expand(const _Tpvec& a, _Tpwvec& b0, _Tpwvec& b1) \ +{ \ + b0.val = intrin(a.val); \ + b1.val = __CV_CAT(intrin, _high)(a.val); \ +} \ +inline _Tpwvec v_expand_low(const _Tpvec& a) \ +{ return _Tpwvec(intrin(a.val)); } \ +inline _Tpwvec v_expand_high(const _Tpvec& a) \ +{ return _Tpwvec(__CV_CAT(intrin, _high)(a.val)); } \ +inline _Tpwvec v_load_expand(const _Tp* ptr) \ +{ \ + v128_t a = wasm_v128_load(ptr); \ + return _Tpwvec(intrin(a)); \ +} + +OPENCV_HAL_IMPL_WASM_EXPAND(v_uint8x16, v_uint16x8, uchar, v128_cvtu8x16_i16x8) +OPENCV_HAL_IMPL_WASM_EXPAND(v_int8x16, v_int16x8, schar, v128_cvti8x16_i16x8) +OPENCV_HAL_IMPL_WASM_EXPAND(v_uint16x8, v_uint32x4, ushort, v128_cvtu16x8_i32x4) +OPENCV_HAL_IMPL_WASM_EXPAND(v_int16x8, v_int32x4, short, v128_cvti16x8_i32x4) +OPENCV_HAL_IMPL_WASM_EXPAND(v_uint32x4, v_uint64x2, unsigned, v128_cvtu32x4_i64x2) +OPENCV_HAL_IMPL_WASM_EXPAND(v_int32x4, v_int64x2, int, v128_cvti32x4_i64x2) + +#define OPENCV_HAL_IMPL_WASM_EXPAND_Q(_Tpvec, _Tp, intrin) \ +inline _Tpvec v_load_expand_q(const _Tp* ptr) \ +{ \ + v128_t a = wasm_v128_load(ptr); \ + return _Tpvec(intrin(a)); \ +} + +OPENCV_HAL_IMPL_WASM_EXPAND_Q(v_uint32x4, uchar, v128_cvtu8x16_i32x4) +OPENCV_HAL_IMPL_WASM_EXPAND_Q(v_int32x4, schar, v128_cvti8x16_i32x4) + +#define OPENCV_HAL_IMPL_WASM_UNPACKS(_Tpvec, suffix) \ +inline void v_zip(const _Tpvec& a0, const _Tpvec& a1, _Tpvec& b0, _Tpvec& b1) \ +{ \ + b0.val = wasm_unpacklo_##suffix(a0.val, a1.val); \ + b1.val = wasm_unpackhi_##suffix(a0.val, a1.val); \ +} \ +inline _Tpvec v_combine_low(const _Tpvec& a, const _Tpvec& b) \ +{ \ + return _Tpvec(wasm_unpacklo_i64x2(a.val, b.val)); \ +} \ +inline _Tpvec v_combine_high(const _Tpvec& a, const _Tpvec& b) \ +{ \ + return _Tpvec(wasm_unpackhi_i64x2(a.val, b.val)); \ +} \ +inline void v_recombine(const _Tpvec& a, const _Tpvec& b, _Tpvec& c, _Tpvec& d) \ +{ \ + c.val = wasm_unpacklo_i64x2(a.val, b.val); \ + d.val = wasm_unpackhi_i64x2(a.val, b.val); \ +} + +OPENCV_HAL_IMPL_WASM_UNPACKS(v_uint8x16, i8x16) +OPENCV_HAL_IMPL_WASM_UNPACKS(v_int8x16, i8x16) +OPENCV_HAL_IMPL_WASM_UNPACKS(v_uint16x8, i16x8) +OPENCV_HAL_IMPL_WASM_UNPACKS(v_int16x8, i16x8) +OPENCV_HAL_IMPL_WASM_UNPACKS(v_uint32x4, i32x4) +OPENCV_HAL_IMPL_WASM_UNPACKS(v_int32x4, i32x4) +OPENCV_HAL_IMPL_WASM_UNPACKS(v_float32x4, i32x4) +OPENCV_HAL_IMPL_WASM_UNPACKS(v_float64x2, i64x2) + +template +inline _Tpvec v_extract(const _Tpvec& a, const _Tpvec& b) +{ + return v_rotate_right(a, b); +} + +inline v_int32x4 v_round(const v_float32x4& a) +{ + v128_t h = wasm_f32x4_splat(0.5); + return v_int32x4(wasm_i32x4_trunc_saturate_f32x4(wasm_f32x4_add(a.val, h))); +} + +inline v_int32x4 v_floor(const v_float32x4& a) +{ + v128_t a1 = wasm_i32x4_trunc_saturate_f32x4(a.val); + v128_t mask = wasm_f32x4_lt(a.val, wasm_f32x4_convert_i32x4(a1)); + return v_int32x4(wasm_i32x4_add(a1, mask)); +} + +inline v_int32x4 v_ceil(const v_float32x4& a) +{ + v128_t a1 = wasm_i32x4_trunc_saturate_f32x4(a.val); + v128_t mask = wasm_f32x4_gt(a.val, wasm_f32x4_convert_i32x4(a1)); + return v_int32x4(wasm_i32x4_sub(a1, mask)); +} + +inline v_int32x4 v_trunc(const v_float32x4& a) +{ return v_int32x4(wasm_i32x4_trunc_saturate_f32x4(a.val)); } + +#define OPENCV_HAL_IMPL_WASM_MATH_FUNC(func, cfunc, _Tpvec, _Tpnvec, _Tp, _Tpn) \ +inline _Tpnvec func(const _Tpvec& a) \ +{ \ + fallback::_Tpvec a_(a); \ + return fallback::func(a_); \ +} + +OPENCV_HAL_IMPL_WASM_MATH_FUNC(v_round, cvRound, v_float64x2, v_int32x4, double, int) +OPENCV_HAL_IMPL_WASM_MATH_FUNC(v_floor, cvFloor, v_float64x2, v_int32x4, double, int) +OPENCV_HAL_IMPL_WASM_MATH_FUNC(v_ceil, cvCeil, v_float64x2, v_int32x4, double, int) +OPENCV_HAL_IMPL_WASM_MATH_FUNC(v_trunc, int, v_float64x2, v_int32x4, double, int) + +inline v_int32x4 v_round(const v_float64x2& a, const v_float64x2& b) +{ + fallback::v_float64x2 a_(a), b_(b); + return fallback::v_round(a_, b_); +} + +#define OPENCV_HAL_IMPL_WASM_TRANSPOSE4x4(_Tpvec, suffix) \ +inline void v_transpose4x4(const _Tpvec& a0, const _Tpvec& a1, \ + const _Tpvec& a2, const _Tpvec& a3, \ + _Tpvec& b0, _Tpvec& b1, \ + _Tpvec& b2, _Tpvec& b3) \ +{ \ + v128_t t0 = wasm_unpacklo_##suffix(a0.val, a1.val); \ + v128_t t1 = wasm_unpacklo_##suffix(a2.val, a3.val); \ + v128_t t2 = wasm_unpackhi_##suffix(a0.val, a1.val); \ + v128_t t3 = wasm_unpackhi_##suffix(a2.val, a3.val); \ +\ + b0.val = wasm_unpacklo_i64x2(t0, t1); \ + b1.val = wasm_unpackhi_i64x2(t0, t1); \ + b2.val = wasm_unpacklo_i64x2(t2, t3); \ + b3.val = wasm_unpackhi_i64x2(t2, t3); \ +} + +OPENCV_HAL_IMPL_WASM_TRANSPOSE4x4(v_uint32x4, i32x4) +OPENCV_HAL_IMPL_WASM_TRANSPOSE4x4(v_int32x4, i32x4) +OPENCV_HAL_IMPL_WASM_TRANSPOSE4x4(v_float32x4, i32x4) + +// load deinterleave +inline void v_load_deinterleave(const uchar* ptr, v_uint8x16& a, v_uint8x16& b) +{ + v128_t t00 = wasm_v128_load(ptr); + v128_t t01 = wasm_v128_load(ptr + 16); + + a.val = wasm_v8x16_shuffle(t00, t01, 0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30); + b.val = wasm_v8x16_shuffle(t00, t01, 1,3,5,7,9,11,13,15,17,19,21,23,25,27,29,31); +} + +inline void v_load_deinterleave(const uchar* ptr, v_uint8x16& a, v_uint8x16& b, v_uint8x16& c) +{ + v128_t t00 = wasm_v128_load(ptr); + v128_t t01 = wasm_v128_load(ptr + 16); + v128_t t02 = wasm_v128_load(ptr + 32); + + v128_t t10 = wasm_v8x16_shuffle(t00, t01, 0,3,6,9,12,15,18,21,24,27,30,1,2,4,5,7); + v128_t t11 = wasm_v8x16_shuffle(t00, t01, 1,4,7,10,13,16,19,22,25,28,31,0,2,3,5,6); + v128_t t12 = wasm_v8x16_shuffle(t00, t01, 2,5,8,11,14,17,20,23,26,29,0,1,3,4,6,7); + + a.val = wasm_v8x16_shuffle(t10, t02, 0,1,2,3,4,5,6,7,8,9,10,17,20,23,26,29); + b.val = wasm_v8x16_shuffle(t11, t02, 0,1,2,3,4,5,6,7,8,9,10,18,21,24,27,30); + c.val = wasm_v8x16_shuffle(t12, t02, 0,1,2,3,4,5,6,7,8,9,16,19,22,25,28,31); +} + +inline void v_load_deinterleave(const uchar* ptr, v_uint8x16& a, v_uint8x16& b, v_uint8x16& c, v_uint8x16& d) +{ + v128_t u0 = wasm_v128_load(ptr); // a0 b0 c0 d0 a1 b1 c1 d1 ... + v128_t u1 = wasm_v128_load(ptr + 16); // a4 b4 c4 d4 ... + v128_t u2 = wasm_v128_load(ptr + 32); // a8 b8 c8 d8 ... + v128_t u3 = wasm_v128_load(ptr + 48); // a12 b12 c12 d12 ... + + v128_t v0 = wasm_v8x16_shuffle(u0, u1, 0,4,8,12,16,20,24,28,1,5,9,13,17,21,25,29); + v128_t v1 = wasm_v8x16_shuffle(u2, u3, 0,4,8,12,16,20,24,28,1,5,9,13,17,21,25,29); + v128_t v2 = wasm_v8x16_shuffle(u0, u1, 2,6,10,14,18,22,26,30,3,7,11,15,19,23,27,31); + v128_t v3 = wasm_v8x16_shuffle(u2, u3, 2,6,10,14,18,22,26,30,3,7,11,15,19,23,27,31); + + a.val = wasm_v8x16_shuffle(v0, v1, 0,1,2,3,4,5,6,7,16,17,18,19,20,21,22,23); + b.val = wasm_v8x16_shuffle(v0, v1, 8,9,10,11,12,13,14,15,24,25,26,27,28,29,30,31); + c.val = wasm_v8x16_shuffle(v2, v3, 0,1,2,3,4,5,6,7,16,17,18,19,20,21,22,23); + d.val = wasm_v8x16_shuffle(v2, v3, 8,9,10,11,12,13,14,15,24,25,26,27,28,29,30,31); +} + +inline void v_load_deinterleave(const ushort* ptr, v_uint16x8& a, v_uint16x8& b) +{ + v128_t v0 = wasm_v128_load(ptr); // a0 b0 a1 b1 a2 b2 a3 b3 + v128_t v1 = wasm_v128_load(ptr + 8); // a4 b4 a5 b5 a6 b6 a7 b7 + + a.val = wasm_v8x16_shuffle(v0, v1, 0,1,4,5,8,9,12,13,16,17,20,21,24,25,28,29); // a0 a1 a2 a3 a4 a5 a6 a7 + b.val = wasm_v8x16_shuffle(v0, v1, 2,3,6,7,10,11,14,15,18,19,22,23,26,27,30,31); // b0 b1 ab b3 b4 b5 b6 b7 +} + +inline void v_load_deinterleave(const ushort* ptr, v_uint16x8& a, v_uint16x8& b, v_uint16x8& c) +{ + v128_t t00 = wasm_v128_load(ptr); // a0 b0 c0 a1 b1 c1 a2 b2 + v128_t t01 = wasm_v128_load(ptr + 8); // c2 a3 b3 c3 a4 b4 c4 a5 + v128_t t02 = wasm_v128_load(ptr + 16); // b5 c5 a6 b6 c6 a7 b7 c7 + + v128_t t10 = wasm_v8x16_shuffle(t00, t01, 0,1,6,7,12,13,18,19,24,25,30,31,2,3,4,5); + v128_t t11 = wasm_v8x16_shuffle(t00, t01, 2,3,8,9,14,15,20,21,26,27,0,1,4,5,6,7); + v128_t t12 = wasm_v8x16_shuffle(t00, t01, 4,5,10,11,16,17,22,23,28,29,0,1,2,3,6,7); + + a.val = wasm_v8x16_shuffle(t10, t02, 0,1,2,3,4,5,6,7,8,9,10,11,20,21,26,27); + b.val = wasm_v8x16_shuffle(t11, t02, 0,1,2,3,4,5,6,7,8,9,16,17,22,23,28,29); + c.val = wasm_v8x16_shuffle(t12, t02, 0,1,2,3,4,5,6,7,8,9,18,19,24,25,30,31); +} + +inline void v_load_deinterleave(const ushort* ptr, v_uint16x8& a, v_uint16x8& b, v_uint16x8& c, v_uint16x8& d) +{ + v128_t u0 = wasm_v128_load(ptr); // a0 b0 c0 d0 a1 b1 c1 d1 + v128_t u1 = wasm_v128_load(ptr + 8); // a2 b2 c2 d2 ... + v128_t u2 = wasm_v128_load(ptr + 16); // a4 b4 c4 d4 ... + v128_t u3 = wasm_v128_load(ptr + 24); // a6 b6 c6 d6 ... + + v128_t v0 = wasm_v8x16_shuffle(u0, u1, 0,1,8,9,16,17,24,25,2,3,10,11,18,19,26,27); // a0 a1 a2 a3 b0 b1 b2 b3 + v128_t v1 = wasm_v8x16_shuffle(u2, u3, 0,1,8,9,16,17,24,25,2,3,10,11,18,19,26,27); // a4 a5 a6 a7 b4 b5 b6 b7 + v128_t v2 = wasm_v8x16_shuffle(u0, u1, 4,5,12,13,20,21,28,29,6,7,14,15,22,23,30,31); // c0 c1 c2 c3 d0 d1 d2 d3 + v128_t v3 = wasm_v8x16_shuffle(u2, u3, 4,5,12,13,20,21,28,29,6,7,14,15,22,23,30,31); // c4 c5 c6 c7 d4 d5 d6 d7 + + a.val = wasm_v8x16_shuffle(v0, v1, 0,1,2,3,4,5,6,7,16,17,18,19,20,21,22,23); + b.val = wasm_v8x16_shuffle(v0, v1, 8,9,10,11,12,13,14,15,24,25,26,27,28,29,30,31); + c.val = wasm_v8x16_shuffle(v2, v3, 0,1,2,3,4,5,6,7,16,17,18,19,20,21,22,23); + d.val = wasm_v8x16_shuffle(v2, v3, 8,9,10,11,12,13,14,15,24,25,26,27,28,29,30,31); +} + +inline void v_load_deinterleave(const unsigned* ptr, v_uint32x4& a, v_uint32x4& b) +{ + v128_t v0 = wasm_v128_load(ptr); // a0 b0 a1 b1 + v128_t v1 = wasm_v128_load(ptr + 4); // a2 b2 a3 b3 + + a.val = wasm_v8x16_shuffle(v0, v1, 0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27); // a0 a1 a2 a3 + b.val = wasm_v8x16_shuffle(v0, v1, 4,5,6,7,12,13,14,15,20,21,22,23,28,29,30,31); // b0 b1 b2 b3 +} + +inline void v_load_deinterleave(const unsigned* ptr, v_uint32x4& a, v_uint32x4& b, v_uint32x4& c) +{ + v128_t t00 = wasm_v128_load(ptr); // a0 b0 c0 a1 + v128_t t01 = wasm_v128_load(ptr + 4); // b2 c2 a3 b3 + v128_t t02 = wasm_v128_load(ptr + 8); // c3 a4 b4 c4 + + v128_t t10 = wasm_v8x16_shuffle(t00, t01, 0,1,2,3,12,13,14,15,24,25,26,27,4,5,6,7); + v128_t t11 = wasm_v8x16_shuffle(t00, t01, 4,5,6,7,16,17,18,19,28,29,30,31,0,1,2,3); + v128_t t12 = wasm_v8x16_shuffle(t00, t01, 8,9,10,11,20,21,22,23,0,1,2,3,4,5,6,7); + + a.val = wasm_v8x16_shuffle(t10, t02, 0,1,2,3,4,5,6,7,8,9,10,11,20,21,22,23); + b.val = wasm_v8x16_shuffle(t11, t02, 0,1,2,3,4,5,6,7,8,9,10,11,24,25,26,27); + c.val = wasm_v8x16_shuffle(t12, t02, 0,1,2,3,4,5,6,7,16,17,18,19,28,29,30,31); +} + +inline void v_load_deinterleave(const unsigned* ptr, v_uint32x4& a, v_uint32x4& b, v_uint32x4& c, v_uint32x4& d) +{ + v_uint32x4 s0(wasm_v128_load(ptr)); // a0 b0 c0 d0 + v_uint32x4 s1(wasm_v128_load(ptr + 4)); // a1 b1 c1 d1 + v_uint32x4 s2(wasm_v128_load(ptr + 8)); // a2 b2 c2 d2 + v_uint32x4 s3(wasm_v128_load(ptr + 12)); // a3 b3 c3 d3 + + v_transpose4x4(s0, s1, s2, s3, a, b, c, d); +} + +inline void v_load_deinterleave(const float* ptr, v_float32x4& a, v_float32x4& b) +{ + v128_t v0 = wasm_v128_load(ptr); // a0 b0 a1 b1 + v128_t v1 = wasm_v128_load((ptr + 4)); // a2 b2 a3 b3 + + a.val = wasm_v8x16_shuffle(v0, v1, 0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27); // a0 a1 a2 a3 + b.val = wasm_v8x16_shuffle(v0, v1, 4,5,6,7,12,13,14,15,20,21,22,23,28,29,30,31); // b0 b1 b2 b3 +} + +inline void v_load_deinterleave(const float* ptr, v_float32x4& a, v_float32x4& b, v_float32x4& c) +{ + v128_t t00 = wasm_v128_load(ptr); // a0 b0 c0 a1 + v128_t t01 = wasm_v128_load(ptr + 4); // b2 c2 a3 b3 + v128_t t02 = wasm_v128_load(ptr + 8); // c3 a4 b4 c4 + + v128_t t10 = wasm_v8x16_shuffle(t00, t01, 0,1,2,3,12,13,14,15,24,25,26,27,4,5,6,7); + v128_t t11 = wasm_v8x16_shuffle(t00, t01, 4,5,6,7,16,17,18,19,28,29,30,31,0,1,2,3); + v128_t t12 = wasm_v8x16_shuffle(t00, t01, 8,9,10,11,20,21,22,23,0,1,2,3,4,5,6,7); + + a.val = wasm_v8x16_shuffle(t10, t02, 0,1,2,3,4,5,6,7,8,9,10,11,20,21,22,23); + b.val = wasm_v8x16_shuffle(t11, t02, 0,1,2,3,4,5,6,7,8,9,10,11,24,25,26,27); + c.val = wasm_v8x16_shuffle(t12, t02, 0,1,2,3,4,5,6,7,16,17,18,19,28,29,30,31); +} + +inline void v_load_deinterleave(const float* ptr, v_float32x4& a, v_float32x4& b, v_float32x4& c, v_float32x4& d) +{ + v_float32x4 s0(wasm_v128_load(ptr)); // a0 b0 c0 d0 + v_float32x4 s1(wasm_v128_load(ptr + 4)); // a1 b1 c1 d1 + v_float32x4 s2(wasm_v128_load(ptr + 8)); // a2 b2 c2 d2 + v_float32x4 s3(wasm_v128_load(ptr + 12)); // a3 b3 c3 d3 + + v_transpose4x4(s0, s1, s2, s3, a, b, c, d); +} + +inline void v_load_deinterleave(const uint64 *ptr, v_uint64x2& a, v_uint64x2& b) +{ + v128_t t0 = wasm_v128_load(ptr); // a0 b0 + v128_t t1 = wasm_v128_load(ptr + 2); // a1 b1 + + a.val = wasm_unpacklo_i64x2(t0, t1); + b.val = wasm_unpackhi_i64x2(t0, t1); +} + +inline void v_load_deinterleave(const uint64 *ptr, v_uint64x2& a, v_uint64x2& b, v_uint64x2& c) +{ + v128_t t0 = wasm_v128_load(ptr); // a0, b0 + v128_t t1 = wasm_v128_load(ptr + 2); // c0, a1 + v128_t t2 = wasm_v128_load(ptr + 4); // b1, c1 + + a.val = wasm_v8x16_shuffle(t0, t1, 0,1,2,3,4,5,6,7,24,25,26,27,28,29,30,31); + b.val = wasm_v8x16_shuffle(t0, t2, 8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23); + c.val = wasm_v8x16_shuffle(t1, t2, 0,1,2,3,4,5,6,7,24,25,26,27,28,29,30,31); +} + +inline void v_load_deinterleave(const uint64 *ptr, v_uint64x2& a, + v_uint64x2& b, v_uint64x2& c, v_uint64x2& d) +{ + v128_t t0 = wasm_v128_load(ptr); // a0 b0 + v128_t t1 = wasm_v128_load(ptr + 2); // c0 d0 + v128_t t2 = wasm_v128_load(ptr + 4); // a1 b1 + v128_t t3 = wasm_v128_load(ptr + 6); // c1 d1 + + a.val = wasm_unpacklo_i64x2(t0, t2); + b.val = wasm_unpackhi_i64x2(t0, t2); + c.val = wasm_unpacklo_i64x2(t1, t3); + d.val = wasm_unpackhi_i64x2(t1, t3); +} + +// store interleave + +inline void v_store_interleave( uchar* ptr, const v_uint8x16& a, const v_uint8x16& b, + hal::StoreMode /*mode*/ = hal::STORE_UNALIGNED) +{ + v128_t v0 = wasm_unpacklo_i8x16(a.val, b.val); + v128_t v1 = wasm_unpackhi_i8x16(a.val, b.val); + + wasm_v128_store(ptr, v0); + wasm_v128_store(ptr + 16, v1); +} + +inline void v_store_interleave( uchar* ptr, const v_uint8x16& a, const v_uint8x16& b, + const v_uint8x16& c, hal::StoreMode /*mode*/ = hal::STORE_UNALIGNED) +{ + v128_t t00 = wasm_v8x16_shuffle(a.val, b.val, 0,16,0,1,17,0,2,18,0,3,19,0,4,20,0,5); + v128_t t01 = wasm_v8x16_shuffle(a.val, b.val, 21,0,6,22,0,7,23,0,8,24,0,9,25,0,10,26); + v128_t t02 = wasm_v8x16_shuffle(a.val, b.val, 0,11,27,0,12,28,0,13,29,0,14,30,0,15,31,0); + + v128_t t10 = wasm_v8x16_shuffle(t00, c.val, 0,1,16,3,4,17,6,7,18,9,10,19,12,13,20,15); + v128_t t11 = wasm_v8x16_shuffle(t01, c.val, 0,21,2,3,22,5,6,23,8,9,24,11,12,25,14,15); + v128_t t12 = wasm_v8x16_shuffle(t02, c.val, 26,1,2,27,4,5,28,7,8,29,10,11,30,13,14,31); + + wasm_v128_store(ptr, t10); + wasm_v128_store(ptr + 16, t11); + wasm_v128_store(ptr + 32, t12); +} + +inline void v_store_interleave( uchar* ptr, const v_uint8x16& a, const v_uint8x16& b, + const v_uint8x16& c, const v_uint8x16& d, + hal::StoreMode /*mode*/ = hal::STORE_UNALIGNED) +{ + // a0 a1 a2 a3 .... + // b0 b1 b2 b3 .... + // c0 c1 c2 c3 .... + // d0 d1 d2 d3 .... + v128_t u0 = wasm_unpacklo_i8x16(a.val, c.val); // a0 c0 a1 c1 ... + v128_t u1 = wasm_unpackhi_i8x16(a.val, c.val); // a8 c8 a9 c9 ... + v128_t u2 = wasm_unpacklo_i8x16(b.val, d.val); // b0 d0 b1 d1 ... + v128_t u3 = wasm_unpackhi_i8x16(b.val, d.val); // b8 d8 b9 d9 ... + + v128_t v0 = wasm_unpacklo_i8x16(u0, u2); // a0 b0 c0 d0 ... + v128_t v1 = wasm_unpackhi_i8x16(u0, u2); // a4 b4 c4 d4 ... + v128_t v2 = wasm_unpacklo_i8x16(u1, u3); // a8 b8 c8 d8 ... + v128_t v3 = wasm_unpackhi_i8x16(u1, u3); // a12 b12 c12 d12 ... + + wasm_v128_store(ptr, v0); + wasm_v128_store(ptr + 16, v1); + wasm_v128_store(ptr + 32, v2); + wasm_v128_store(ptr + 48, v3); +} + +inline void v_store_interleave( ushort* ptr, const v_uint16x8& a, const v_uint16x8& b, + hal::StoreMode /*mode*/ = hal::STORE_UNALIGNED) +{ + v128_t v0 = wasm_unpacklo_i16x8(a.val, b.val); + v128_t v1 = wasm_unpackhi_i16x8(a.val, b.val); + + wasm_v128_store(ptr, v0); + wasm_v128_store(ptr + 8, v1); +} + +inline void v_store_interleave( ushort* ptr, const v_uint16x8& a, + const v_uint16x8& b, const v_uint16x8& c, + hal::StoreMode /*mode*/ = hal::STORE_UNALIGNED) +{ + v128_t t00 = wasm_v8x16_shuffle(a.val, b.val, 0,1,16,17,0,0,2,3,18,19,0,0,4,5,20,21); + v128_t t01 = wasm_v8x16_shuffle(a.val, b.val, 0,0,6,7,22,23,0,0,8,9,24,25,0,0,10,11); + v128_t t02 = wasm_v8x16_shuffle(a.val, b.val, 26,27,0,0,12,13,28,29,0,0,14,15,30,31,0,0); + + v128_t t10 = wasm_v8x16_shuffle(t00, c.val, 0,1,2,3,16,17,6,7,8,9,18,19,12,13,14,15); + v128_t t11 = wasm_v8x16_shuffle(t01, c.val, 20,21,2,3,4,5,22,23,8,9,10,11,24,25,14,15); + v128_t t12 = wasm_v8x16_shuffle(t02, c.val, 0,1,26,27,4,5,6,7,28,29,10,11,12,13,30,31); + + wasm_v128_store(ptr, t10); + wasm_v128_store(ptr + 8, t11); + wasm_v128_store(ptr + 16, t12); +} + +inline void v_store_interleave( ushort* ptr, const v_uint16x8& a, const v_uint16x8& b, + const v_uint16x8& c, const v_uint16x8& d, + hal::StoreMode /*mode*/ = hal::STORE_UNALIGNED) +{ + // a0 a1 a2 a3 .... + // b0 b1 b2 b3 .... + // c0 c1 c2 c3 .... + // d0 d1 d2 d3 .... + v128_t u0 = wasm_unpacklo_i16x8(a.val, c.val); // a0 c0 a1 c1 ... + v128_t u1 = wasm_unpackhi_i16x8(a.val, c.val); // a4 c4 a5 c5 ... + v128_t u2 = wasm_unpacklo_i16x8(b.val, d.val); // b0 d0 b1 d1 ... + v128_t u3 = wasm_unpackhi_i16x8(b.val, d.val); // b4 d4 b5 d5 ... + + v128_t v0 = wasm_unpacklo_i16x8(u0, u2); // a0 b0 c0 d0 ... + v128_t v1 = wasm_unpackhi_i16x8(u0, u2); // a2 b2 c2 d2 ... + v128_t v2 = wasm_unpacklo_i16x8(u1, u3); // a4 b4 c4 d4 ... + v128_t v3 = wasm_unpackhi_i16x8(u1, u3); // a6 b6 c6 d6 ... + + wasm_v128_store(ptr, v0); + wasm_v128_store(ptr + 8, v1); + wasm_v128_store(ptr + 16, v2); + wasm_v128_store(ptr + 24, v3); +} + +inline void v_store_interleave( unsigned* ptr, const v_uint32x4& a, const v_uint32x4& b, + hal::StoreMode /*mode*/ = hal::STORE_UNALIGNED) +{ + v128_t v0 = wasm_unpacklo_i32x4(a.val, b.val); + v128_t v1 = wasm_unpackhi_i32x4(a.val, b.val); + + wasm_v128_store(ptr, v0); + wasm_v128_store(ptr + 4, v1); +} + +inline void v_store_interleave( unsigned* ptr, const v_uint32x4& a, const v_uint32x4& b, + const v_uint32x4& c, hal::StoreMode /*mode*/ = hal::STORE_UNALIGNED) +{ + v128_t t00 = wasm_v8x16_shuffle(a.val, b.val, 0,1,2,3,16,17,18,19,0,0,0,0,4,5,6,7); + v128_t t01 = wasm_v8x16_shuffle(a.val, b.val, 20,21,22,23,0,0,0,0,8,9,10,11,24,25,26,27); + v128_t t02 = wasm_v8x16_shuffle(a.val, b.val, 0,0,0,0,12,13,14,15,28,29,30,31,0,0,0,0); + + v128_t t10 = wasm_v8x16_shuffle(t00, c.val, 0,1,2,3,4,5,6,7,16,17,18,19,12,13,14,15); + v128_t t11 = wasm_v8x16_shuffle(t01, c.val, 0,1,2,3,20,21,22,23,8,9,10,11,12,13,14,15); + v128_t t12 = wasm_v8x16_shuffle(t02, c.val, 24,25,26,27,4,5,6,7,8,9,10,11,28,29,30,31); + + wasm_v128_store(ptr, t10); + wasm_v128_store(ptr + 4, t11); + wasm_v128_store(ptr + 8, t12); +} + +inline void v_store_interleave(unsigned* ptr, const v_uint32x4& a, const v_uint32x4& b, + const v_uint32x4& c, const v_uint32x4& d, + hal::StoreMode /*mode*/ = hal::STORE_UNALIGNED) +{ + v_uint32x4 v0, v1, v2, v3; + v_transpose4x4(a, b, c, d, v0, v1, v2, v3); + + wasm_v128_store(ptr, v0.val); + wasm_v128_store(ptr + 4, v1.val); + wasm_v128_store(ptr + 8, v2.val); + wasm_v128_store(ptr + 12, v3.val); +} + +// 2-channel, float only +inline void v_store_interleave(float* ptr, const v_float32x4& a, const v_float32x4& b, + hal::StoreMode /*mode*/ = hal::STORE_UNALIGNED) +{ + v128_t v0 = wasm_unpacklo_i32x4(a.val, b.val); + v128_t v1 = wasm_unpackhi_i32x4(a.val, b.val); + + wasm_v128_store(ptr, v0); + wasm_v128_store(ptr + 4, v1); +} + +inline void v_store_interleave(float* ptr, const v_float32x4& a, const v_float32x4& b, + const v_float32x4& c, hal::StoreMode /*mode*/ = hal::STORE_UNALIGNED) +{ + v128_t t00 = wasm_v8x16_shuffle(a.val, b.val, 0,1,2,3,16,17,18,19,0,0,0,0,4,5,6,7); + v128_t t01 = wasm_v8x16_shuffle(a.val, b.val, 20,21,22,23,0,0,0,0,8,9,10,11,24,25,26,27); + v128_t t02 = wasm_v8x16_shuffle(a.val, b.val, 0,0,0,0,12,13,14,15,28,29,30,31,0,0,0,0); + + v128_t t10 = wasm_v8x16_shuffle(t00, c.val, 0,1,2,3,4,5,6,7,16,17,18,19,12,13,14,15); + v128_t t11 = wasm_v8x16_shuffle(t01, c.val, 0,1,2,3,20,21,22,23,8,9,10,11,12,13,14,15); + v128_t t12 = wasm_v8x16_shuffle(t02, c.val, 24,25,26,27,4,5,6,7,8,9,10,11,28,29,30,31); + + wasm_v128_store(ptr, t10); + wasm_v128_store(ptr + 4, t11); + wasm_v128_store(ptr + 8, t12); +} + +inline void v_store_interleave(float* ptr, const v_float32x4& a, const v_float32x4& b, + const v_float32x4& c, const v_float32x4& d, + hal::StoreMode /*mode*/ = hal::STORE_UNALIGNED) +{ + v_float32x4 v0, v1, v2, v3; + v_transpose4x4(a, b, c, d, v0, v1, v2, v3); + + wasm_v128_store(ptr, v0.val); + wasm_v128_store(ptr + 4, v1.val); + wasm_v128_store(ptr + 8, v2.val); + wasm_v128_store(ptr + 12, v3.val); +} + +inline void v_store_interleave(uint64 *ptr, const v_uint64x2& a, const v_uint64x2& b, + hal::StoreMode /*mode*/ = hal::STORE_UNALIGNED) +{ + v128_t v0 = wasm_unpacklo_i64x2(a.val, b.val); + v128_t v1 = wasm_unpackhi_i64x2(a.val, b.val); + + wasm_v128_store(ptr, v0); + wasm_v128_store(ptr + 2, v1); +} + +inline void v_store_interleave(uint64 *ptr, const v_uint64x2& a, const v_uint64x2& b, + const v_uint64x2& c, hal::StoreMode /*mode*/ = hal::STORE_UNALIGNED) +{ + v128_t v0 = wasm_v8x16_shuffle(a.val, b.val, 0,1,2,3,4,5,6,7,16,17,18,19,20,21,22,23); + v128_t v1 = wasm_v8x16_shuffle(a.val, c.val, 16,17,18,19,20,21,22,23,8,9,10,11,12,13,14,15); + v128_t v2 = wasm_v8x16_shuffle(b.val, c.val, 8,9,10,11,12,13,14,15,24,25,26,27,28,29,30,31); + + wasm_v128_store(ptr, v0); + wasm_v128_store(ptr + 2, v1); + wasm_v128_store(ptr + 4, v2); +} + +inline void v_store_interleave(uint64 *ptr, const v_uint64x2& a, const v_uint64x2& b, + const v_uint64x2& c, const v_uint64x2& d, + hal::StoreMode /*mode*/ = hal::STORE_UNALIGNED) +{ + v128_t v0 = wasm_unpacklo_i64x2(a.val, b.val); + v128_t v1 = wasm_unpacklo_i64x2(c.val, d.val); + v128_t v2 = wasm_unpackhi_i64x2(a.val, b.val); + v128_t v3 = wasm_unpackhi_i64x2(c.val, d.val); + + wasm_v128_store(ptr, v0); + wasm_v128_store(ptr + 2, v1); + wasm_v128_store(ptr + 4, v2); + wasm_v128_store(ptr + 6, v3); +} + +#define OPENCV_HAL_IMPL_WASM_LOADSTORE_INTERLEAVE(_Tpvec0, _Tp0, suffix0, _Tpvec1, _Tp1, suffix1) \ +inline void v_load_deinterleave( const _Tp0* ptr, _Tpvec0& a0, _Tpvec0& b0 ) \ +{ \ + _Tpvec1 a1, b1; \ + v_load_deinterleave((const _Tp1*)ptr, a1, b1); \ + a0 = v_reinterpret_as_##suffix0(a1); \ + b0 = v_reinterpret_as_##suffix0(b1); \ +} \ +inline void v_load_deinterleave( const _Tp0* ptr, _Tpvec0& a0, _Tpvec0& b0, _Tpvec0& c0 ) \ +{ \ + _Tpvec1 a1, b1, c1; \ + v_load_deinterleave((const _Tp1*)ptr, a1, b1, c1); \ + a0 = v_reinterpret_as_##suffix0(a1); \ + b0 = v_reinterpret_as_##suffix0(b1); \ + c0 = v_reinterpret_as_##suffix0(c1); \ +} \ +inline void v_load_deinterleave( const _Tp0* ptr, _Tpvec0& a0, _Tpvec0& b0, _Tpvec0& c0, _Tpvec0& d0 ) \ +{ \ + _Tpvec1 a1, b1, c1, d1; \ + v_load_deinterleave((const _Tp1*)ptr, a1, b1, c1, d1); \ + a0 = v_reinterpret_as_##suffix0(a1); \ + b0 = v_reinterpret_as_##suffix0(b1); \ + c0 = v_reinterpret_as_##suffix0(c1); \ + d0 = v_reinterpret_as_##suffix0(d1); \ +} \ +inline void v_store_interleave( _Tp0* ptr, const _Tpvec0& a0, const _Tpvec0& b0, \ + hal::StoreMode mode = hal::STORE_UNALIGNED ) \ +{ \ + _Tpvec1 a1 = v_reinterpret_as_##suffix1(a0); \ + _Tpvec1 b1 = v_reinterpret_as_##suffix1(b0); \ + v_store_interleave((_Tp1*)ptr, a1, b1, mode); \ +} \ +inline void v_store_interleave( _Tp0* ptr, const _Tpvec0& a0, const _Tpvec0& b0, \ + const _Tpvec0& c0, hal::StoreMode mode = hal::STORE_UNALIGNED ) \ +{ \ + _Tpvec1 a1 = v_reinterpret_as_##suffix1(a0); \ + _Tpvec1 b1 = v_reinterpret_as_##suffix1(b0); \ + _Tpvec1 c1 = v_reinterpret_as_##suffix1(c0); \ + v_store_interleave((_Tp1*)ptr, a1, b1, c1, mode); \ +} \ +inline void v_store_interleave( _Tp0* ptr, const _Tpvec0& a0, const _Tpvec0& b0, \ + const _Tpvec0& c0, const _Tpvec0& d0, \ + hal::StoreMode mode = hal::STORE_UNALIGNED ) \ +{ \ + _Tpvec1 a1 = v_reinterpret_as_##suffix1(a0); \ + _Tpvec1 b1 = v_reinterpret_as_##suffix1(b0); \ + _Tpvec1 c1 = v_reinterpret_as_##suffix1(c0); \ + _Tpvec1 d1 = v_reinterpret_as_##suffix1(d0); \ + v_store_interleave((_Tp1*)ptr, a1, b1, c1, d1, mode); \ +} + +OPENCV_HAL_IMPL_WASM_LOADSTORE_INTERLEAVE(v_int8x16, schar, s8, v_uint8x16, uchar, u8) +OPENCV_HAL_IMPL_WASM_LOADSTORE_INTERLEAVE(v_int16x8, short, s16, v_uint16x8, ushort, u16) +OPENCV_HAL_IMPL_WASM_LOADSTORE_INTERLEAVE(v_int32x4, int, s32, v_uint32x4, unsigned, u32) +OPENCV_HAL_IMPL_WASM_LOADSTORE_INTERLEAVE(v_int64x2, int64, s64, v_uint64x2, uint64, u64) +OPENCV_HAL_IMPL_WASM_LOADSTORE_INTERLEAVE(v_float64x2, double, f64, v_uint64x2, uint64, u64) + +inline v_float32x4 v_cvt_f32(const v_int32x4& a) +{ + return v_float32x4(wasm_f32x4_convert_i32x4(a.val)); +} + +inline v_float32x4 v_cvt_f32(const v_float64x2& a) +{ + fallback::v_float64x2 a_(a); + return fallback::v_cvt_f32(a_); +} + +inline v_float32x4 v_cvt_f32(const v_float64x2& a, const v_float64x2& b) +{ + fallback::v_float64x2 a_(a), b_(b); + return fallback::v_cvt_f32(a_, b_); +} + +inline v_float64x2 v_cvt_f64(const v_int32x4& a) +{ +#ifdef __wasm_unimplemented_simd128__ + v128_t p = v128_cvti32x4_i64x2(a.val); + return v_float64x2(wasm_f64x2_convert_i64x2(p)); +#else + fallback::v_int32x4 a_(a); + return fallback::v_cvt_f64(a_); +#endif +} + +inline v_float64x2 v_cvt_f64_high(const v_int32x4& a) +{ +#ifdef __wasm_unimplemented_simd128__ + v128_t p = v128_cvti32x4_i64x2_high(a.val); + return v_float64x2(wasm_f64x2_convert_i64x2(p)); +#else + fallback::v_int32x4 a_(a); + return fallback::v_cvt_f64_high(a_); +#endif +} + +inline v_float64x2 v_cvt_f64(const v_float32x4& a) +{ + fallback::v_float32x4 a_(a); + return fallback::v_cvt_f64(a_); +} + +inline v_float64x2 v_cvt_f64_high(const v_float32x4& a) +{ + fallback::v_float32x4 a_(a); + return fallback::v_cvt_f64_high(a_); +} + +inline v_float64x2 v_cvt_f64(const v_int64x2& a) +{ +#ifdef __wasm_unimplemented_simd128__ + return v_float64x2(wasm_f64x2_convert_i64x2(a.val)); +#else + fallback::v_int64x2 a_(a); + return fallback::v_cvt_f64(a_); +#endif +} + +////////////// Lookup table access //////////////////// + +inline v_int8x16 v_lut(const schar* tab, const int* idx) +{ + return v_int8x16(tab[idx[0]], tab[idx[1]], tab[idx[ 2]], tab[idx[ 3]], tab[idx[ 4]], tab[idx[ 5]], tab[idx[ 6]], tab[idx[ 7]], + tab[idx[8]], tab[idx[9]], tab[idx[10]], tab[idx[11]], tab[idx[12]], tab[idx[13]], tab[idx[14]], tab[idx[15]]); +} +inline v_int8x16 v_lut_pairs(const schar* tab, const int* idx) +{ + return v_int8x16(tab[idx[0]], tab[idx[0]+1], tab[idx[1]], tab[idx[1]+1], tab[idx[2]], tab[idx[2]+1], tab[idx[3]], tab[idx[3]+1], + tab[idx[4]], tab[idx[4]+1], tab[idx[5]], tab[idx[5]+1], tab[idx[6]], tab[idx[6]+1], tab[idx[7]], tab[idx[7]+1]); +} +inline v_int8x16 v_lut_quads(const schar* tab, const int* idx) +{ + return v_int8x16(tab[idx[0]], tab[idx[0]+1], tab[idx[0]+2], tab[idx[0]+3], tab[idx[1]], tab[idx[1]+1], tab[idx[1]+2], tab[idx[1]+3], + tab[idx[2]], tab[idx[2]+1], tab[idx[2]+2], tab[idx[2]+3], tab[idx[3]], tab[idx[3]+1], tab[idx[3]+2], tab[idx[3]+3]); +} +inline v_uint8x16 v_lut(const uchar* tab, const int* idx) { return v_reinterpret_as_u8(v_lut((const schar *)tab, idx)); } +inline v_uint8x16 v_lut_pairs(const uchar* tab, const int* idx) { return v_reinterpret_as_u8(v_lut_pairs((const schar *)tab, idx)); } +inline v_uint8x16 v_lut_quads(const uchar* tab, const int* idx) { return v_reinterpret_as_u8(v_lut_quads((const schar *)tab, idx)); } + +inline v_int16x8 v_lut(const short* tab, const int* idx) +{ + return v_int16x8(tab[idx[0]], tab[idx[1]], tab[idx[2]], tab[idx[3]], + tab[idx[4]], tab[idx[5]], tab[idx[6]], tab[idx[7]]); +} +inline v_int16x8 v_lut_pairs(const short* tab, const int* idx) +{ + return v_int16x8(tab[idx[0]], tab[idx[0]+1], tab[idx[1]], tab[idx[1]+1], + tab[idx[2]], tab[idx[2]+1], tab[idx[3]], tab[idx[3]+1]); +} +inline v_int16x8 v_lut_quads(const short* tab, const int* idx) +{ + return v_int16x8(tab[idx[0]], tab[idx[0]+1], tab[idx[0]+2], tab[idx[0]+3], + tab[idx[1]], tab[idx[1]+1], tab[idx[1]+2], tab[idx[1]+3]); +} +inline v_uint16x8 v_lut(const ushort* tab, const int* idx) { return v_reinterpret_as_u16(v_lut((const short *)tab, idx)); } +inline v_uint16x8 v_lut_pairs(const ushort* tab, const int* idx) { return v_reinterpret_as_u16(v_lut_pairs((const short *)tab, idx)); } +inline v_uint16x8 v_lut_quads(const ushort* tab, const int* idx) { return v_reinterpret_as_u16(v_lut_quads((const short *)tab, idx)); } + +inline v_int32x4 v_lut(const int* tab, const int* idx) +{ + return v_int32x4(tab[idx[0]], tab[idx[1]], + tab[idx[2]], tab[idx[3]]); +} +inline v_int32x4 v_lut_pairs(const int* tab, const int* idx) +{ + return v_int32x4(tab[idx[0]], tab[idx[0]+1], + tab[idx[1]], tab[idx[1]+1]); +} +inline v_int32x4 v_lut_quads(const int* tab, const int* idx) +{ + return v_int32x4(wasm_v128_load(tab + idx[0])); +} +inline v_uint32x4 v_lut(const unsigned* tab, const int* idx) { return v_reinterpret_as_u32(v_lut((const int *)tab, idx)); } +inline v_uint32x4 v_lut_pairs(const unsigned* tab, const int* idx) { return v_reinterpret_as_u32(v_lut_pairs((const int *)tab, idx)); } +inline v_uint32x4 v_lut_quads(const unsigned* tab, const int* idx) { return v_reinterpret_as_u32(v_lut_quads((const int *)tab, idx)); } + +inline v_int64x2 v_lut(const int64_t* tab, const int* idx) +{ + return v_int64x2(tab[idx[0]], tab[idx[1]]); +} +inline v_int64x2 v_lut_pairs(const int64_t* tab, const int* idx) +{ + return v_int64x2(wasm_v128_load(tab + idx[0])); +} +inline v_uint64x2 v_lut(const uint64_t* tab, const int* idx) { return v_reinterpret_as_u64(v_lut((const int64_t *)tab, idx)); } +inline v_uint64x2 v_lut_pairs(const uint64_t* tab, const int* idx) { return v_reinterpret_as_u64(v_lut_pairs((const int64_t *)tab, idx)); } + +inline v_float32x4 v_lut(const float* tab, const int* idx) +{ + return v_float32x4(tab[idx[0]], tab[idx[1]], tab[idx[2]], tab[idx[3]]); +} +inline v_float32x4 v_lut_pairs(const float* tab, const int* idx) { return v_reinterpret_as_f32(v_lut_pairs((const int *)tab, idx)); } +inline v_float32x4 v_lut_quads(const float* tab, const int* idx) { return v_reinterpret_as_f32(v_lut_quads((const int *)tab, idx)); } + +inline v_float64x2 v_lut(const double* tab, const int* idx) +{ + return v_float64x2(tab[idx[0]], tab[idx[1]]); +} +inline v_float64x2 v_lut_pairs(const double* tab, const int* idx) +{ + return v_float64x2(wasm_v128_load(tab + idx[0])); +} + +inline v_int32x4 v_lut(const int* tab, const v_int32x4& idxvec) +{ + return v_int32x4(tab[wasm_i32x4_extract_lane(idxvec.val, 0)], + tab[wasm_i32x4_extract_lane(idxvec.val, 1)], + tab[wasm_i32x4_extract_lane(idxvec.val, 2)], + tab[wasm_i32x4_extract_lane(idxvec.val, 3)]); +} + +inline v_uint32x4 v_lut(const unsigned* tab, const v_int32x4& idxvec) +{ + return v_reinterpret_as_u32(v_lut((const int *)tab, idxvec)); +} + +inline v_float32x4 v_lut(const float* tab, const v_int32x4& idxvec) +{ + return v_float32x4(tab[wasm_i32x4_extract_lane(idxvec.val, 0)], + tab[wasm_i32x4_extract_lane(idxvec.val, 1)], + tab[wasm_i32x4_extract_lane(idxvec.val, 2)], + tab[wasm_i32x4_extract_lane(idxvec.val, 3)]); +} + +inline v_float64x2 v_lut(const double* tab, const v_int32x4& idxvec) +{ + return v_float64x2(tab[wasm_i32x4_extract_lane(idxvec.val, 0)], + tab[wasm_i32x4_extract_lane(idxvec.val, 1)]); +} + +// loads pairs from the table and deinterleaves them, e.g. returns: +// x = (tab[idxvec[0], tab[idxvec[1]], tab[idxvec[2]], tab[idxvec[3]]), +// y = (tab[idxvec[0]+1], tab[idxvec[1]+1], tab[idxvec[2]+1], tab[idxvec[3]+1]) +// note that the indices are float's indices, not the float-pair indices. +// in theory, this function can be used to implement bilinear interpolation, +// when idxvec are the offsets within the image. +inline void v_lut_deinterleave(const float* tab, const v_int32x4& idxvec, v_float32x4& x, v_float32x4& y) +{ + x = v_float32x4(tab[wasm_i32x4_extract_lane(idxvec.val, 0)], + tab[wasm_i32x4_extract_lane(idxvec.val, 1)], + tab[wasm_i32x4_extract_lane(idxvec.val, 2)], + tab[wasm_i32x4_extract_lane(idxvec.val, 3)]); + y = v_float32x4(tab[wasm_i32x4_extract_lane(idxvec.val, 0)+1], + tab[wasm_i32x4_extract_lane(idxvec.val, 1)+1], + tab[wasm_i32x4_extract_lane(idxvec.val, 2)+1], + tab[wasm_i32x4_extract_lane(idxvec.val, 3)+1]); +} + +inline void v_lut_deinterleave(const double* tab, const v_int32x4& idxvec, v_float64x2& x, v_float64x2& y) +{ + v128_t xy0 = wasm_v128_load(tab + wasm_i32x4_extract_lane(idxvec.val, 0)); + v128_t xy1 = wasm_v128_load(tab + wasm_i32x4_extract_lane(idxvec.val, 1)); + x.val = wasm_unpacklo_i64x2(xy0, xy1); + y.val = wasm_unpacklo_i64x2(xy0, xy1); +} + +inline v_int8x16 v_interleave_pairs(const v_int8x16& vec) +{ + return v_int8x16(wasm_v8x16_shuffle(vec.val, vec.val, 0,2,1,3,4,6,5,7,8,10,9,11,12,14,13,15)); +} +inline v_uint8x16 v_interleave_pairs(const v_uint8x16& vec) { return v_reinterpret_as_u8(v_interleave_pairs(v_reinterpret_as_s8(vec))); } +inline v_int8x16 v_interleave_quads(const v_int8x16& vec) +{ + return v_int8x16(wasm_v8x16_shuffle(vec.val, vec.val, 0,4,1,5,2,6,3,7,8,12,9,13,10,14,11,15)); +} +inline v_uint8x16 v_interleave_quads(const v_uint8x16& vec) { return v_reinterpret_as_u8(v_interleave_quads(v_reinterpret_as_s8(vec))); } + +inline v_int16x8 v_interleave_pairs(const v_int16x8& vec) +{ + return v_int16x8(wasm_v8x16_shuffle(vec.val, vec.val, 0,1,4,5,2,3,6,7,8,9,12,13,10,11,14,15)); +} +inline v_uint16x8 v_interleave_pairs(const v_uint16x8& vec) { return v_reinterpret_as_u16(v_interleave_pairs(v_reinterpret_as_s16(vec))); } +inline v_int16x8 v_interleave_quads(const v_int16x8& vec) +{ + return v_int16x8(wasm_v8x16_shuffle(vec.val, vec.val, 0,1,8,9,2,3,10,11,4,5,12,13,6,7,14,15)); +} +inline v_uint16x8 v_interleave_quads(const v_uint16x8& vec) { return v_reinterpret_as_u16(v_interleave_quads(v_reinterpret_as_s16(vec))); } + +inline v_int32x4 v_interleave_pairs(const v_int32x4& vec) +{ + return v_int32x4(wasm_v8x16_shuffle(vec.val, vec.val, 0,1,2,3,8,9,10,11,4,5,6,7,12,13,14,15)); +} +inline v_uint32x4 v_interleave_pairs(const v_uint32x4& vec) { return v_reinterpret_as_u32(v_interleave_pairs(v_reinterpret_as_s32(vec))); } +inline v_float32x4 v_interleave_pairs(const v_float32x4& vec) +{ + return v_float32x4(wasm_v8x16_shuffle(vec.val, vec.val, 0,1,2,3,8,9,10,11,4,5,6,7,12,13,14,15)); +} + +inline v_int8x16 v_pack_triplets(const v_int8x16& vec) +{ + return v_int8x16(wasm_v8x16_shuffle(vec.val, vec.val, 0,1,2,4,5,6,8,9,10,12,13,14,16,16,16,16)); +} +inline v_uint8x16 v_pack_triplets(const v_uint8x16& vec) { return v_reinterpret_as_u8(v_pack_triplets(v_reinterpret_as_s8(vec))); } + +inline v_int16x8 v_pack_triplets(const v_int16x8& vec) +{ + return v_int16x8(wasm_v8x16_shuffle(vec.val, vec.val, 0,1,2,3,4,5,8,9,10,11,12,13,14,15,6,7)); +} +inline v_uint16x8 v_pack_triplets(const v_uint16x8& vec) { return v_reinterpret_as_u16(v_pack_triplets(v_reinterpret_as_s16(vec))); } + +inline v_int32x4 v_pack_triplets(const v_int32x4& vec) { return vec; } +inline v_uint32x4 v_pack_triplets(const v_uint32x4& vec) { return vec; } +inline v_float32x4 v_pack_triplets(const v_float32x4& vec) { return vec; } + +template +inline typename _Tp::lane_type v_extract_n(const _Tp& a) +{ + return v_rotate_right(a).get0(); +} + +template +inline v_uint32x4 v_broadcast_element(const v_uint32x4& a) +{ + return v_setall_u32(v_extract_n(a)); +} +template +inline v_int32x4 v_broadcast_element(const v_int32x4& a) +{ + return v_setall_s32(v_extract_n(a)); +} +template +inline v_float32x4 v_broadcast_element(const v_float32x4& a) +{ + return v_setall_f32(v_extract_n(a)); +} + + +////////////// FP16 support /////////////////////////// + +inline v_float32x4 v_load_expand(const float16_t* ptr) +{ + return fallback::v_load_expand(ptr); +} + +inline void v_pack_store(float16_t* ptr, const v_float32x4& v) +{ + fallback::v_float32x4 v_(v); + fallback::v_pack_store(ptr, v_); +} + +inline void v_cleanup() {} + +CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END + +//! @endcond + +} + +#endif diff --git a/IPL/include/opencv/opencv2/core/hal/msa_macros.h b/IPL/include/opencv/opencv2/core/hal/msa_macros.h new file mode 100644 index 0000000..bd6ddb1 --- /dev/null +++ b/IPL/include/opencv/opencv2/core/hal/msa_macros.h @@ -0,0 +1,1558 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#ifndef OPENCV_CORE_HAL_MSA_MACROS_H +#define OPENCV_CORE_HAL_MSA_MACROS_H + +#ifdef __mips_msa +#include "msa.h" +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/* Define 64 bits vector types */ +typedef signed char v8i8 __attribute__ ((vector_size(8), aligned(8))); +typedef unsigned char v8u8 __attribute__ ((vector_size(8), aligned(8))); +typedef short v4i16 __attribute__ ((vector_size(8), aligned(8))); +typedef unsigned short v4u16 __attribute__ ((vector_size(8), aligned(8))); +typedef int v2i32 __attribute__ ((vector_size(8), aligned(8))); +typedef unsigned int v2u32 __attribute__ ((vector_size(8), aligned(8))); +typedef long long v1i64 __attribute__ ((vector_size(8), aligned(8))); +typedef unsigned long long v1u64 __attribute__ ((vector_size(8), aligned(8))); +typedef float v2f32 __attribute__ ((vector_size(8), aligned(8))); +typedef double v1f64 __attribute__ ((vector_size(8), aligned(8))); + + +/* Load values from the given memory a 64-bit vector. */ +#define msa_ld1_s8(__a) (*((v8i8*)(__a))) +#define msa_ld1_s16(__a) (*((v4i16*)(__a))) +#define msa_ld1_s32(__a) (*((v2i32*)(__a))) +#define msa_ld1_s64(__a) (*((v1i64*)(__a))) +#define msa_ld1_u8(__a) (*((v8u8*)(__a))) +#define msa_ld1_u16(__a) (*((v4u16*)(__a))) +#define msa_ld1_u32(__a) (*((v2u32*)(__a))) +#define msa_ld1_u64(__a) (*((v1u64*)(__a))) +#define msa_ld1_f32(__a) (*((v2f32*)(__a))) +#define msa_ld1_f64(__a) (*((v1f64*)(__a))) + +/* Load values from the given memory address to a 128-bit vector */ +#define msa_ld1q_s8(__a) ((v16i8)__builtin_msa_ld_b(__a, 0)) +#define msa_ld1q_s16(__a) ((v8i16)__builtin_msa_ld_h(__a, 0)) +#define msa_ld1q_s32(__a) ((v4i32)__builtin_msa_ld_w(__a, 0)) +#define msa_ld1q_s64(__a) ((v2i64)__builtin_msa_ld_d(__a, 0)) +#define msa_ld1q_u8(__a) ((v16u8)__builtin_msa_ld_b(__a, 0)) +#define msa_ld1q_u16(__a) ((v8u16)__builtin_msa_ld_h(__a, 0)) +#define msa_ld1q_u32(__a) ((v4u32)__builtin_msa_ld_w(__a, 0)) +#define msa_ld1q_u64(__a) ((v2u64)__builtin_msa_ld_d(__a, 0)) +#define msa_ld1q_f32(__a) ((v4f32)__builtin_msa_ld_w(__a, 0)) +#define msa_ld1q_f64(__a) ((v2f64)__builtin_msa_ld_d(__a, 0)) + +/* Store 64bits vector elements values to the given memory address. */ +#define msa_st1_s8(__a, __b) (*((v8i8*)(__a)) = __b) +#define msa_st1_s16(__a, __b) (*((v4i16*)(__a)) = __b) +#define msa_st1_s32(__a, __b) (*((v2i32*)(__a)) = __b) +#define msa_st1_s64(__a, __b) (*((v1i64*)(__a)) = __b) +#define msa_st1_u8(__a, __b) (*((v8u8*)(__a)) = __b) +#define msa_st1_u16(__a, __b) (*((v4u16*)(__a)) = __b) +#define msa_st1_u32(__a, __b) (*((v2u32*)(__a)) = __b) +#define msa_st1_u64(__a, __b) (*((v1u64*)(__a)) = __b) +#define msa_st1_f32(__a, __b) (*((v2f32*)(__a)) = __b) +#define msa_st1_f64(__a, __b) (*((v1f64*)(__a)) = __b) + +/* Store the values of elements in the 128 bits vector __a to the given memory address __a. */ +#define msa_st1q_s8(__a, __b) (__builtin_msa_st_b((v16i8)(__b), __a, 0)) +#define msa_st1q_s16(__a, __b) (__builtin_msa_st_h((v8i16)(__b), __a, 0)) +#define msa_st1q_s32(__a, __b) (__builtin_msa_st_w((v4i32)(__b), __a, 0)) +#define msa_st1q_s64(__a, __b) (__builtin_msa_st_d((v2i64)(__b), __a, 0)) +#define msa_st1q_u8(__a, __b) (__builtin_msa_st_b((v16i8)(__b), __a, 0)) +#define msa_st1q_u16(__a, __b) (__builtin_msa_st_h((v8i16)(__b), __a, 0)) +#define msa_st1q_u32(__a, __b) (__builtin_msa_st_w((v4i32)(__b), __a, 0)) +#define msa_st1q_u64(__a, __b) (__builtin_msa_st_d((v2i64)(__b), __a, 0)) +#define msa_st1q_f32(__a, __b) (__builtin_msa_st_w((v4i32)(__b), __a, 0)) +#define msa_st1q_f64(__a, __b) (__builtin_msa_st_d((v2i64)(__b), __a, 0)) + +/* Store the value of the element with the index __c in vector __a to the given memory address __a. */ +#define msa_st1_lane_s8(__a, __b, __c) (*((int8_t*)(__a)) = __b[__c]) +#define msa_st1_lane_s16(__a, __b, __c) (*((int16_t*)(__a)) = __b[__c]) +#define msa_st1_lane_s32(__a, __b, __c) (*((int32_t*)(__a)) = __b[__c]) +#define msa_st1_lane_s64(__a, __b, __c) (*((int64_t*)(__a)) = __b[__c]) +#define msa_st1_lane_u8(__a, __b, __c) (*((uint8_t*)(__a)) = __b[__c]) +#define msa_st1_lane_u16(__a, __b, __c) (*((uint16_t*)(__a)) = __b[__c]) +#define msa_st1_lane_u32(__a, __b, __c) (*((uint32_t*)(__a)) = __b[__c]) +#define msa_st1_lane_u64(__a, __b, __c) (*((uint64_t*)(__a)) = __b[__c]) +#define msa_st1_lane_f32(__a, __b, __c) (*((float*)(__a)) = __b[__c]) +#define msa_st1_lane_f64(__a, __b, __c) (*((double*)(__a)) = __b[__c]) +#define msa_st1q_lane_s8(__a, __b, __c) (*((int8_t*)(__a)) = (int8_t)__builtin_msa_copy_s_b(__b, __c)) +#define msa_st1q_lane_s16(__a, __b, __c) (*((int16_t*)(__a)) = (int16_t)__builtin_msa_copy_s_h(__b, __c)) +#define msa_st1q_lane_s32(__a, __b, __c) (*((int32_t*)(__a)) = __builtin_msa_copy_s_w(__b, __c)) +#define msa_st1q_lane_s64(__a, __b, __c) (*((int64_t*)(__a)) = __builtin_msa_copy_s_d(__b, __c)) +#define msa_st1q_lane_u8(__a, __b, __c) (*((uint8_t*)(__a)) = (uint8_t)__builtin_msa_copy_u_b((v16i8)(__b), __c)) +#define msa_st1q_lane_u16(__a, __b, __c) (*((uint16_t*)(__a)) = (uint16_t)__builtin_msa_copy_u_h((v8i16)(__b), __c)) +#define msa_st1q_lane_u32(__a, __b, __c) (*((uint32_t*)(__a)) = __builtin_msa_copy_u_w((v4i32)(__b), __c)) +#define msa_st1q_lane_u64(__a, __b, __c) (*((uint64_t*)(__a)) = __builtin_msa_copy_u_d((v2i64)(__b), __c)) +#define msa_st1q_lane_f32(__a, __b, __c) (*((float*)(__a)) = __b[__c]) +#define msa_st1q_lane_f64(__a, __b, __c) (*((double*)(__a)) = __b[__c]) + +/* Duplicate elements for 64-bit doubleword vectors */ +#define msa_dup_n_s8(__a) ((v8i8)__builtin_msa_copy_s_d((v2i64)__builtin_msa_fill_b((int32_t)(__a)), 0)) +#define msa_dup_n_s16(__a) ((v4i16)__builtin_msa_copy_s_d((v2i64)__builtin_msa_fill_h((int32_t)(__a)), 0)) +#define msa_dup_n_s32(__a) ((v2i32){__a, __a}) +#define msa_dup_n_s64(__a) ((v1i64){__a}) +#define msa_dup_n_u8(__a) ((v8u8)__builtin_msa_copy_u_d((v2i64)__builtin_msa_fill_b((int32_t)(__a)), 0)) +#define msa_dup_n_u16(__a) ((v4u16)__builtin_msa_copy_u_d((v2i64)__builtin_msa_fill_h((int32_t)(__a)), 0)) +#define msa_dup_n_u32(__a) ((v2u32){__a, __a}) +#define msa_dup_n_u64(__a) ((v1u64){__a}) +#define msa_dup_n_f32(__a) ((v2f32){__a, __a}) +#define msa_dup_n_f64(__a) ((v1f64){__a}) + +/* Duplicate elements for 128-bit quadword vectors */ +#define msa_dupq_n_s8(__a) (__builtin_msa_fill_b((int32_t)(__a))) +#define msa_dupq_n_s16(__a) (__builtin_msa_fill_h((int32_t)(__a))) +#define msa_dupq_n_s32(__a) (__builtin_msa_fill_w((int32_t)(__a))) +#define msa_dupq_n_s64(__a) (__builtin_msa_fill_d((int64_t)(__a))) +#define msa_dupq_n_u8(__a) ((v16u8)__builtin_msa_fill_b((int32_t)(__a))) +#define msa_dupq_n_u16(__a) ((v8u16)__builtin_msa_fill_h((int32_t)(__a))) +#define msa_dupq_n_u32(__a) ((v4u32)__builtin_msa_fill_w((int32_t)(__a))) +#define msa_dupq_n_u64(__a) ((v2u64)__builtin_msa_fill_d((int64_t)(__a))) +#define msa_dupq_n_f32(__a) ((v4f32){__a, __a, __a, __a}) +#define msa_dupq_n_f64(__a) ((v2f64){__a, __a}) +#define msa_dupq_lane_s8(__a, __b) (__builtin_msa_splat_b(__a, __b)) +#define msa_dupq_lane_s16(__a, __b) (__builtin_msa_splat_h(__a, __b)) +#define msa_dupq_lane_s32(__a, __b) (__builtin_msa_splat_w(__a, __b)) +#define msa_dupq_lane_s64(__a, __b) (__builtin_msa_splat_d(__a, __b)) +#define msa_dupq_lane_u8(__a, __b) ((v16u8)__builtin_msa_splat_b((v16i8)(__a), __b)) +#define msa_dupq_lane_u16(__a, __b) ((v8u16)__builtin_msa_splat_h((v8i16)(__a), __b)) +#define msa_dupq_lane_u32(__a, __b) ((v4u32)__builtin_msa_splat_w((v4i32)(__a), __b)) +#define msa_dupq_lane_u64(__a, __b) ((v2u64)__builtin_msa_splat_d((v2i64)(__a), __b)) + +/* Create a 64 bits vector */ +#define msa_create_s8(__a) ((v8i8)((uint64_t)(__a))) +#define msa_create_s16(__a) ((v4i16)((uint64_t)(__a))) +#define msa_create_s32(__a) ((v2i32)((uint64_t)(__a))) +#define msa_create_s64(__a) ((v1i64)((uint64_t)(__a))) +#define msa_create_u8(__a) ((v8u8)((uint64_t)(__a))) +#define msa_create_u16(__a) ((v4u16)((uint64_t)(__a))) +#define msa_create_u32(__a) ((v2u32)((uint64_t)(__a))) +#define msa_create_u64(__a) ((v1u64)((uint64_t)(__a))) +#define msa_create_f32(__a) ((v2f32)((uint64_t)(__a))) +#define msa_create_f64(__a) ((v1f64)((uint64_t)(__a))) + +/* Sign extends or zero extends each element in a 64 bits vector to twice its original length, and places the results in a 128 bits vector. */ +/*Transform v8i8 to v8i16*/ +#define msa_movl_s8(__a) \ +((v8i16){(__a)[0], (__a)[1], (__a)[2], (__a)[3], \ + (__a)[4], (__a)[5], (__a)[6], (__a)[7]}) + +/*Transform v8u8 to v8u16*/ +#define msa_movl_u8(__a) \ +((v8u16){(__a)[0], (__a)[1], (__a)[2], (__a)[3], \ + (__a)[4], (__a)[5], (__a)[6], (__a)[7]}) + +/*Transform v4i16 to v8i16*/ +#define msa_movl_s16(__a) ((v4i32){(__a)[0], (__a)[1], (__a)[2], (__a)[3]}) + +/*Transform v2i32 to v4i32*/ +#define msa_movl_s32(__a) ((v2i64){(__a)[0], (__a)[1]}) + +/*Transform v4u16 to v8u16*/ +#define msa_movl_u16(__a) ((v4u32){(__a)[0], (__a)[1], (__a)[2], (__a)[3]}) + +/*Transform v2u32 to v4u32*/ +#define msa_movl_u32(__a) ((v2u64){(__a)[0], (__a)[1]}) + +/* Copies the least significant half of each element of a 128 bits vector into the corresponding elements of a 64 bits vector. */ +#define msa_movn_s16(__a) \ +({ \ + v16i8 __d = __builtin_msa_pckev_b(__builtin_msa_fill_b(0), (v16i8)(__a)); \ + (v8i8)__builtin_msa_copy_s_d((v2i64)__d, 0); \ +}) + +#define msa_movn_s32(__a) \ +({ \ + v8i16 __d = __builtin_msa_pckev_h(__builtin_msa_fill_h(0), (v8i16)(__a)); \ + (v4i16)__builtin_msa_copy_s_d((v2i64)__d, 0); \ +}) + +#define msa_movn_s64(__a) \ +({ \ + v4i32 __d = __builtin_msa_pckev_w(__builtin_msa_fill_w(0), (v4i32)(__a)); \ + (v2i32)__builtin_msa_copy_s_d((v2i64)__d, 0); \ +}) + +#define msa_movn_u16(__a) \ +({ \ + v16i8 __d = __builtin_msa_pckev_b(__builtin_msa_fill_b(0), (v16i8)(__a)); \ + (v8u8)__builtin_msa_copy_u_d((v2i64)__d, 0); \ +}) + +#define msa_movn_u32(__a) \ +({ \ + v8i16 __d = __builtin_msa_pckev_h(__builtin_msa_fill_h(0), (v8i16)(__a)); \ + (v4u16)__builtin_msa_copy_u_d((v2i64)__d, 0); \ +}) + +#define msa_movn_u64(__a) \ +({ \ + v4i32 __d = __builtin_msa_pckev_w(__builtin_msa_fill_w(0), (v4i32)(__a)); \ + (v2u32)__builtin_msa_copy_u_d((v2i64)__d, 0); \ +}) + +/* qmovn */ +#define msa_qmovn_s16(__a) \ +({ \ + v16i8 __d = __builtin_msa_pckev_b(__builtin_msa_fill_b(0), (v16i8)__builtin_msa_sat_s_h((v8i16)(__a), 7)); \ + (v8i8)__builtin_msa_copy_s_d((v2i64)__d, 0); \ +}) + +#define msa_qmovn_s32(__a) \ +({ \ + v8i16 __d = __builtin_msa_pckev_h(__builtin_msa_fill_h(0), (v8i16)__builtin_msa_sat_s_w((v4i32)(__a), 15)); \ + (v4i16)__builtin_msa_copy_s_d((v2i64)__d, 0); \ +}) + +#define msa_qmovn_s64(__a) \ +({ \ + v4i32 __d = __builtin_msa_pckev_w(__builtin_msa_fill_w(0), (v4i32)__builtin_msa_sat_s_d((v2i64)(__a), 31)); \ + (v2i32)__builtin_msa_copy_s_d((v2i64)__d, 0); \ +}) + +#define msa_qmovn_u16(__a) \ +({ \ + v16i8 __d = __builtin_msa_pckev_b(__builtin_msa_fill_b(0), (v16i8)__builtin_msa_sat_u_h((v8u16)(__a), 7)); \ + (v8u8)__builtin_msa_copy_u_d((v2i64)__d, 0); \ +}) + +#define msa_qmovn_u32(__a) \ +({ \ + v8i16 __d = __builtin_msa_pckev_h(__builtin_msa_fill_h(0), (v8i16)__builtin_msa_sat_u_w((v4u32)(__a), 15)); \ + (v4u16)__builtin_msa_copy_u_d((v2i64)__d, 0); \ +}) + +#define msa_qmovn_u64(__a) \ +({ \ + v4i32 __d = __builtin_msa_pckev_w(__builtin_msa_fill_w(0), (v4i32)__builtin_msa_sat_u_d((v2u64)(__a), 31)); \ + (v2u32)__builtin_msa_copy_u_d((v2i64)__d, 0); \ +}) + +/* qmovun */ +#define msa_qmovun_s16(__a) \ +({ \ + v8i16 __d = __builtin_msa_max_s_h(__builtin_msa_fill_h(0), (v8i16)(__a)); \ + v16i8 __e = __builtin_msa_pckev_b(__builtin_msa_fill_b(0), (v16i8)__builtin_msa_sat_u_h((v8u16)__d, 7)); \ + (v8u8)__builtin_msa_copy_u_d((v2i64)__e, 0); \ +}) + +#define msa_qmovun_s32(__a) \ +({ \ + v4i32 __d = __builtin_msa_max_s_w(__builtin_msa_fill_w(0), (v4i32)(__a)); \ + v8i16 __e = __builtin_msa_pckev_h(__builtin_msa_fill_h(0), (v8i16)__builtin_msa_sat_u_w((v4u32)__d, 15)); \ + (v4u16)__builtin_msa_copy_u_d((v2i64)__e, 0); \ +}) + +#define msa_qmovun_s64(__a) \ +({ \ + v2i64 __d = __builtin_msa_max_s_d(__builtin_msa_fill_d(0), (v2i64)(__a)); \ + v4i32 __e = __builtin_msa_pckev_w(__builtin_msa_fill_w(0), (v4i32)__builtin_msa_sat_u_d((v2u64)__d, 31)); \ + (v2u32)__builtin_msa_copy_u_d((v2i64)__e, 0); \ +}) + +/* Right shift elements in a 128 bits vector by an immediate value, and places the results in a 64 bits vector. */ +#define msa_shrn_n_s16(__a, __b) \ +({ \ + v16i8 __d = __builtin_msa_pckev_b(__builtin_msa_fill_b(0), (v16i8)__builtin_msa_srai_h((v8i16)(__a), (int)(__b))); \ + (v8i8)__builtin_msa_copy_s_d((v2i64)__d, 0); \ +}) + +#define msa_shrn_n_s32(__a, __b) \ +({ \ + v8i16 __d = __builtin_msa_pckev_h(__builtin_msa_fill_h(0), (v8i16)__builtin_msa_srai_w((v4i32)(__a), (int)(__b))); \ + (v4i16)__builtin_msa_copy_s_d((v2i64)__d, 0); \ +}) + +#define msa_shrn_n_s64(__a, __b) \ +({ \ + v4i32 __d = __builtin_msa_pckev_w(__builtin_msa_fill_w(0), (v4i32)__builtin_msa_srai_d((v2i64)(__a), (int)(__b))); \ + (v2i32)__builtin_msa_copy_s_d((v2i64)__d, 0); \ +}) + +#define msa_shrn_n_u16(__a, __b) \ +({ \ + v16i8 __d = __builtin_msa_pckev_b(__builtin_msa_fill_b(0), (v16i8)__builtin_msa_srli_h((v8i16)(__a), (int)(__b))); \ + (v8u8)__builtin_msa_copy_u_d((v2i64)__d, 0); \ +}) + +#define msa_shrn_n_u32(__a, __b) \ +({ \ + v8i16 __d = __builtin_msa_pckev_h(__builtin_msa_fill_h(0), (v8i16)__builtin_msa_srli_w((v4i32)(__a), (int)(__b))); \ + (v4u16)__builtin_msa_copy_u_d((v2i64)__d, 0); \ +}) + +#define msa_shrn_n_u64(__a, __b) \ +({ \ + v4i32 __d = __builtin_msa_pckev_w(__builtin_msa_fill_w(0), (v4i32)__builtin_msa_srli_d((v2i64)(__a), (int)(__b))); \ + (v2u32)__builtin_msa_copy_u_d((v2i64)__d, 0); \ +}) + +/* Right shift elements in a 128 bits vector by an immediate value, and places the results in a 64 bits vector. */ +#define msa_rshrn_n_s16(__a, __b) \ +({ \ + v16i8 __d = __builtin_msa_pckev_b(__builtin_msa_fill_b(0), (v16i8)__builtin_msa_srari_h((v8i16)(__a), (int)__b)); \ + (v8i8)__builtin_msa_copy_s_d((v2i64)__d, 0); \ +}) + +#define msa_rshrn_n_s32(__a, __b) \ +({ \ + v8i16 __d = __builtin_msa_pckev_h(__builtin_msa_fill_h(0), (v8i16)__builtin_msa_srari_w((v4i32)(__a), (int)__b)); \ + (v4i16)__builtin_msa_copy_s_d((v2i64)__d, 0); \ +}) + +#define msa_rshrn_n_s64(__a, __b) \ +({ \ + v4i32 __d = __builtin_msa_pckev_w(__builtin_msa_fill_w(0), (v4i32)__builtin_msa_srari_d((v2i64)(__a), (int)__b)); \ + (v2i32)__builtin_msa_copy_s_d((v2i64)__d, 0); \ +}) + +#define msa_rshrn_n_u16(__a, __b) \ +({ \ + v16i8 __d = __builtin_msa_pckev_b(__builtin_msa_fill_b(0), (v16i8)__builtin_msa_srlri_h((v8i16)(__a), (int)__b)); \ + (v8u8)__builtin_msa_copy_u_d((v2i64)__d, 0); \ +}) + +#define msa_rshrn_n_u32(__a, __b) \ +({ \ + v8i16 __d = __builtin_msa_pckev_h(__builtin_msa_fill_h(0), (v8i16)__builtin_msa_srlri_w((v4i32)(__a), (int)__b)); \ + (v4u16)__builtin_msa_copy_u_d((v2i64)__d, 0); \ +}) + +#define msa_rshrn_n_u64(__a, __b) \ +({ \ + v4i32 __d = __builtin_msa_pckev_w(__builtin_msa_fill_w(0), (v4i32)__builtin_msa_srlri_d((v2i64)(__a), (int)__b)); \ + (v2u32)__builtin_msa_copy_u_d((v2i64)__d, 0); \ +}) + +/* Right shift elements in a 128 bits vector by an immediate value, saturate the results and them in a 64 bits vector. */ +#define msa_qrshrn_n_s16(__a, __b) \ +({ \ + v8i16 __d = __builtin_msa_sat_s_h(__builtin_msa_srari_h((v8i16)(__a), (int)(__b)), 7); \ + v16i8 __e = __builtin_msa_pckev_b(__builtin_msa_fill_b(0), (v16i8)__d); \ + (v8i8)__builtin_msa_copy_s_d((v2i64)__e, 0); \ +}) + +#define msa_qrshrn_n_s32(__a, __b) \ +({ \ + v4i32 __d = __builtin_msa_sat_s_w(__builtin_msa_srari_w((v4i32)(__a), (int)(__b)), 15); \ + v8i16 __e = __builtin_msa_pckev_h(__builtin_msa_fill_h(0), (v8i16)__d); \ + (v4i16)__builtin_msa_copy_s_d((v2i64)__e, 0); \ +}) + +#define msa_qrshrn_n_s64(__a, __b) \ +({ \ + v2i64 __d = __builtin_msa_sat_s_d(__builtin_msa_srari_d((v2i64)(__a), (int)(__b)), 31); \ + v4i32 __e = __builtin_msa_pckev_w(__builtin_msa_fill_w(0), (v4i32)__d); \ + (v2i32)__builtin_msa_copy_s_d((v2i64)__e, 0); \ +}) + +#define msa_qrshrn_n_u16(__a, __b) \ +({ \ + v8u16 __d = __builtin_msa_sat_u_h((v8u16)__builtin_msa_srlri_h((v8i16)(__a), (int)(__b)), 7); \ + v16i8 __e = __builtin_msa_pckev_b(__builtin_msa_fill_b(0), (v16i8)__d); \ + (v8u8)__builtin_msa_copy_u_d((v2i64)__e, 0); \ +}) + +#define msa_qrshrn_n_u32(__a, __b) \ +({ \ + v4u32 __d = __builtin_msa_sat_u_w((v4u32)__builtin_msa_srlri_w((v4i32)(__a), (int)(__b)), 15); \ + v8i16 __e = __builtin_msa_pckev_h(__builtin_msa_fill_h(0), (v8i16)__d); \ + (v4u16)__builtin_msa_copy_u_d((v2i64)__e, 0); \ +}) + +#define msa_qrshrn_n_u64(__a, __b) \ +({ \ + v2u64 __d = __builtin_msa_sat_u_d((v2u64)__builtin_msa_srlri_d((v2i64)(__a), (int)(__b)), 31); \ + v4i32 __e = __builtin_msa_pckev_w(__builtin_msa_fill_w(0), (v4i32)__d); \ + (v2u32)__builtin_msa_copy_u_d((v2i64)__e, 0); \ +}) + +/* Right shift elements in a 128 bits vector by an immediate value, saturate the results and them in a 64 bits vector. + Input is signed and output is unsigned. */ +#define msa_qrshrun_n_s16(__a, __b) \ +({ \ + v8i16 __d = __builtin_msa_srlri_h(__builtin_msa_max_s_h(__builtin_msa_fill_h(0), (v8i16)(__a)), (int)(__b)); \ + v16i8 __e = __builtin_msa_pckev_b(__builtin_msa_fill_b(0), (v16i8)__builtin_msa_sat_u_h((v8u16)__d, 7)); \ + (v8u8)__builtin_msa_copy_u_d((v2i64)__e, 0); \ +}) + +#define msa_qrshrun_n_s32(__a, __b) \ +({ \ + v4i32 __d = __builtin_msa_srlri_w(__builtin_msa_max_s_w(__builtin_msa_fill_w(0), (v4i32)(__a)), (int)(__b)); \ + v8i16 __e = __builtin_msa_pckev_h(__builtin_msa_fill_h(0), (v8i16)__builtin_msa_sat_u_w((v4u32)__d, 15)); \ + (v4u16)__builtin_msa_copy_u_d((v2i64)__e, 0); \ +}) + +#define msa_qrshrun_n_s64(__a, __b) \ +({ \ + v2i64 __d = __builtin_msa_srlri_d(__builtin_msa_max_s_d(__builtin_msa_fill_d(0), (v2i64)(__a)), (int)(__b)); \ + v4i32 __e = __builtin_msa_pckev_w(__builtin_msa_fill_w(0), (v4i32)__builtin_msa_sat_u_d((v2u64)__d, 31)); \ + (v2u32)__builtin_msa_copy_u_d((v2i64)__e, 0); \ +}) + +/* pack */ +#define msa_pack_s16(__a, __b) (__builtin_msa_pckev_b((v16i8)(__b), (v16i8)(__a))) +#define msa_pack_s32(__a, __b) (__builtin_msa_pckev_h((v8i16)(__b), (v8i16)(__a))) +#define msa_pack_s64(__a, __b) (__builtin_msa_pckev_w((v4i32)(__b), (v4i32)(__a))) +#define msa_pack_u16(__a, __b) ((v16u8)__builtin_msa_pckev_b((v16i8)(__b), (v16i8)(__a))) +#define msa_pack_u32(__a, __b) ((v8u16)__builtin_msa_pckev_h((v8i16)(__b), (v8i16)(__a))) +#define msa_pack_u64(__a, __b) ((v4u32)__builtin_msa_pckev_w((v4i32)(__b), (v4i32)(__a))) + +/* qpack */ +#define msa_qpack_s16(__a, __b) \ +(__builtin_msa_pckev_b((v16i8)__builtin_msa_sat_s_h((v8i16)(__b), 7), (v16i8)__builtin_msa_sat_s_h((v8i16)(__a), 7))) +#define msa_qpack_s32(__a, __b) \ +(__builtin_msa_pckev_h((v8i16)__builtin_msa_sat_s_w((v4i32)(__b), 15), (v8i16)__builtin_msa_sat_s_w((v4i32)(__a), 15))) +#define msa_qpack_s64(__a, __b) \ +(__builtin_msa_pckev_w((v4i32)__builtin_msa_sat_s_d((v2i64)(__b), 31), (v4i32)__builtin_msa_sat_s_d((v2i64)(__a), 31))) +#define msa_qpack_u16(__a, __b) \ +((v16u8)__builtin_msa_pckev_b((v16i8)__builtin_msa_sat_u_h((v8u16)(__b), 7), (v16i8)__builtin_msa_sat_u_h((v8u16)(__a), 7))) +#define msa_qpack_u32(__a, __b) \ +((v8u16)__builtin_msa_pckev_h((v8i16)__builtin_msa_sat_u_w((v4u32)(__b), 15), (v8i16)__builtin_msa_sat_u_w((v4u32)(__a), 15))) +#define msa_qpack_u64(__a, __b) \ +((v4u32)__builtin_msa_pckev_w((v4i32)__builtin_msa_sat_u_d((v2u64)(__b), 31), (v4i32)__builtin_msa_sat_u_d((v2u64)(__a), 31))) + +/* qpacku */ +#define msa_qpacku_s16(__a, __b) \ +((v16u8)__builtin_msa_pckev_b((v16i8)__builtin_msa_sat_u_h((v8u16)(__builtin_msa_max_s_h(__builtin_msa_fill_h(0), (v8i16)(__b))), 7), \ + (v16i8)__builtin_msa_sat_u_h((v8u16)(__builtin_msa_max_s_h(__builtin_msa_fill_h(0), (v8i16)(__a))), 7))) +#define msa_qpacku_s32(__a, __b) \ +((v8u16)__builtin_msa_pckev_h((v8i16)__builtin_msa_sat_u_w((v4u32)(__builtin_msa_max_s_w(__builtin_msa_fill_w(0), (v4i32)(__b))), 15), \ + (v8i16)__builtin_msa_sat_u_w((v4u32)(__builtin_msa_max_s_w(__builtin_msa_fill_w(0), (v4i32)(__a))), 15))) +#define msa_qpacku_s64(__a, __b) \ +((v4u32)__builtin_msa_pckev_w((v4i32)__builtin_msa_sat_u_d((v2u64)(__builtin_msa_max_s_d(__builtin_msa_fill_d(0), (v2i64)(__b))), 31), \ + (v4i32)__builtin_msa_sat_u_d((v2u64)(__builtin_msa_max_s_d(__builtin_msa_fill_d(0), (v2i64)(__a))), 31))) + +/* packr */ +#define msa_packr_s16(__a, __b, __c) \ +(__builtin_msa_pckev_b((v16i8)__builtin_msa_srai_h((v8i16)(__b), (int)(__c)), (v16i8)__builtin_msa_srai_h((v8i16)(__a), (int)(__c)))) +#define msa_packr_s32(__a, __b, __c) \ +(__builtin_msa_pckev_h((v8i16)__builtin_msa_srai_w((v4i32)(__b), (int)(__c)), (v8i16)__builtin_msa_srai_w((v4i32)(__a), (int)(__c)))) +#define msa_packr_s64(__a, __b, __c) \ +(__builtin_msa_pckev_w((v4i32)__builtin_msa_srai_d((v2i64)(__b), (int)(__c)), (v4i32)__builtin_msa_srai_d((v2i64)(__a), (int)(__c)))) +#define msa_packr_u16(__a, __b, __c) \ +((v16u8)__builtin_msa_pckev_b((v16i8)__builtin_msa_srli_h((v8i16)(__b), (int)(__c)), (v16i8)__builtin_msa_srli_h((v8i16)(__a), (int)(__c)))) +#define msa_packr_u32(__a, __b, __c) \ +((v8u16)__builtin_msa_pckev_h((v8i16)__builtin_msa_srli_w((v4i32)(__b), (int)(__c)), (v8i16)__builtin_msa_srli_w((v4i32)(__a), (int)(__c)))) +#define msa_packr_u64(__a, __b, __c) \ +((v4u32)__builtin_msa_pckev_w((v4i32)__builtin_msa_srli_d((v2i64)(__b), (int)(__c)), (v4i32)__builtin_msa_srli_d((v2i64)(__a), (int)(__c)))) + +/* rpackr */ +#define msa_rpackr_s16(__a, __b, __c) \ +(__builtin_msa_pckev_b((v16i8)__builtin_msa_srari_h((v8i16)(__b), (int)(__c)), (v16i8)__builtin_msa_srari_h((v8i16)(__a), (int)(__c)))) +#define msa_rpackr_s32(__a, __b, __c) \ +(__builtin_msa_pckev_h((v8i16)__builtin_msa_srari_w((v4i32)(__b), (int)(__c)), (v8i16)__builtin_msa_srari_w((v4i32)(__a), (int)(__c)))) +#define msa_rpackr_s64(__a, __b, __c) \ +(__builtin_msa_pckev_w((v4i32)__builtin_msa_srari_d((v2i64)(__b), (int)(__c)), (v4i32)__builtin_msa_srari_d((v2i64)(__a), (int)(__c)))) +#define msa_rpackr_u16(__a, __b, __c) \ +((v16u8)__builtin_msa_pckev_b((v16i8)__builtin_msa_srlri_h((v8i16)(__b), (int)(__c)), (v16i8)__builtin_msa_srlri_h((v8i16)(__a), (int)(__c)))) +#define msa_rpackr_u32(__a, __b, __c) \ +((v8u16)__builtin_msa_pckev_h((v8i16)__builtin_msa_srlri_w((v4i32)(__b), (int)(__c)), (v8i16)__builtin_msa_srlri_w((v4i32)(__a), (int)(__c)))) +#define msa_rpackr_u64(__a, __b, __c) \ +((v4u32)__builtin_msa_pckev_w((v4i32)__builtin_msa_srlri_d((v2i64)(__b), (int)(__c)), (v4i32)__builtin_msa_srlri_d((v2i64)(__a), (int)(__c)))) + +/* qrpackr */ +#define msa_qrpackr_s16(__a, __b, __c) \ +(__builtin_msa_pckev_b((v16i8)__builtin_msa_sat_s_h(__builtin_msa_srari_h((v8i16)(__b), (int)(__c)), 7), \ + (v16i8)__builtin_msa_sat_s_h(__builtin_msa_srari_h((v8i16)(__a), (int)(__c)), 7))) +#define msa_qrpackr_s32(__a, __b, __c) \ +(__builtin_msa_pckev_h((v8i16)__builtin_msa_sat_s_w(__builtin_msa_srari_w((v4i32)(__b), (int)(__c)), 15), \ + (v8i16)__builtin_msa_sat_s_w(__builtin_msa_srari_w((v4i32)(__a), (int)(__c)), 15))) +#define msa_qrpackr_s64(__a, __b, __c) \ +(__builtin_msa_pckev_w((v4i32)__builtin_msa_sat_s_d(__builtin_msa_srari_d((v2i64)(__b), (int)(__c)), 31), \ + (v4i32)__builtin_msa_sat_s_d(__builtin_msa_srari_d((v2i64)(__a), (int)(__c)), 31))) +#define msa_qrpackr_u16(__a, __b, __c) \ +((v16u8)__builtin_msa_pckev_b((v16i8)__builtin_msa_sat_u_h((v8u16)__builtin_msa_srlri_h((v8i16)(__b), (int)(__c)), 7), \ + (v16i8)__builtin_msa_sat_u_h((v8u16)__builtin_msa_srlri_h((v8i16)(__a), (int)(__c)), 7))) +#define msa_qrpackr_u32(__a, __b, __c) \ +((v8u16)__builtin_msa_pckev_h((v8i16)__builtin_msa_sat_u_w((v4u32)__builtin_msa_srlri_w((v4i32)(__b), (int)(__c)), 15), \ + (v8i16)__builtin_msa_sat_u_w((v4u32)__builtin_msa_srlri_w((v4i32)(__a), (int)(__c)), 15))) +#define msa_qrpackr_u64(__a, __b, __c) \ +((v4u32)__builtin_msa_pckev_w((v4i32)__builtin_msa_sat_u_d((v2u64)__builtin_msa_srlri_d((v2i64)(__b), (int)(__c)), 31), \ + (v4i32)__builtin_msa_sat_u_d((v2u64)__builtin_msa_srlri_d((v2i64)(__a), (int)(__c)), 31))) + +/* qrpackru */ +#define msa_qrpackru_s16(__a, __b, __c) \ +({ \ + v8i16 __d = __builtin_msa_srlri_h(__builtin_msa_max_s_h(__builtin_msa_fill_h(0), (v8i16)(__a)), (int)(__c)); \ + v8i16 __e = __builtin_msa_srlri_h(__builtin_msa_max_s_h(__builtin_msa_fill_h(0), (v8i16)(__b)), (int)(__c)); \ + (v16u8)__builtin_msa_pckev_b((v16i8)__builtin_msa_sat_u_h((v8u16)__e, 7), (v16i8)__builtin_msa_sat_u_h((v8u16)__d, 7)); \ +}) + +#define msa_qrpackru_s32(__a, __b, __c) \ +({ \ + v4i32 __d = __builtin_msa_srlri_w(__builtin_msa_max_s_w(__builtin_msa_fill_w(0), (v4i32)(__a)), (int)(__c)); \ + v4i32 __e = __builtin_msa_srlri_w(__builtin_msa_max_s_w(__builtin_msa_fill_w(0), (v4i32)(__b)), (int)(__c)); \ + (v8u16)__builtin_msa_pckev_h((v8i16)__builtin_msa_sat_u_w((v4u32)__e, 15), (v8i16)__builtin_msa_sat_u_w((v4u32)__d, 15)); \ +}) + +#define msa_qrpackru_s64(__a, __b, __c) \ +({ \ + v2i64 __d = __builtin_msa_srlri_d(__builtin_msa_max_s_d(__builtin_msa_fill_d(0), (v2i64)(__a)), (int)(__c)); \ + v2i64 __e = __builtin_msa_srlri_d(__builtin_msa_max_s_d(__builtin_msa_fill_d(0), (v2i64)(__b)), (int)(__c)); \ + (v4u32)__builtin_msa_pckev_w((v4i32)__builtin_msa_sat_u_d((v2u64)__e, 31), (v4i32)__builtin_msa_sat_u_d((v2u64)__d, 31)); \ +}) + +/* Minimum values between corresponding elements in the two vectors are written to the returned vector. */ +#define msa_minq_s8(__a, __b) (__builtin_msa_min_s_b(__a, __b)) +#define msa_minq_s16(__a, __b) (__builtin_msa_min_s_h(__a, __b)) +#define msa_minq_s32(__a, __b) (__builtin_msa_min_s_w(__a, __b)) +#define msa_minq_s64(__a, __b) (__builtin_msa_min_s_d(__a, __b)) +#define msa_minq_u8(__a, __b) ((v16u8)__builtin_msa_min_u_b(__a, __b)) +#define msa_minq_u16(__a, __b) ((v8u16)__builtin_msa_min_u_h(__a, __b)) +#define msa_minq_u32(__a, __b) ((v4u32)__builtin_msa_min_u_w(__a, __b)) +#define msa_minq_u64(__a, __b) ((v2u64)__builtin_msa_min_u_d(__a, __b)) +#define msa_minq_f32(__a, __b) (__builtin_msa_fmin_w(__a, __b)) +#define msa_minq_f64(__a, __b) (__builtin_msa_fmin_d(__a, __b)) + +/* Maximum values between corresponding elements in the two vectors are written to the returned vector. */ +#define msa_maxq_s8(__a, __b) (__builtin_msa_max_s_b(__a, __b)) +#define msa_maxq_s16(__a, __b) (__builtin_msa_max_s_h(__a, __b)) +#define msa_maxq_s32(__a, __b) (__builtin_msa_max_s_w(__a, __b)) +#define msa_maxq_s64(__a, __b) (__builtin_msa_max_s_d(__a, __b)) +#define msa_maxq_u8(__a, __b) ((v16u8)__builtin_msa_max_u_b(__a, __b)) +#define msa_maxq_u16(__a, __b) ((v8u16)__builtin_msa_max_u_h(__a, __b)) +#define msa_maxq_u32(__a, __b) ((v4u32)__builtin_msa_max_u_w(__a, __b)) +#define msa_maxq_u64(__a, __b) ((v2u64)__builtin_msa_max_u_d(__a, __b)) +#define msa_maxq_f32(__a, __b) (__builtin_msa_fmax_w(__a, __b)) +#define msa_maxq_f64(__a, __b) (__builtin_msa_fmax_d(__a, __b)) + +/* Vector type reinterpretion */ +#define MSA_TPV_REINTERPRET(_Tpv, Vec) ((_Tpv)(Vec)) + +/* Add the odd elements in vector __a with the even elements in vector __b to double width elements in the returned vector. */ +/* v8i16 msa_hadd_s16 ((v16i8)__a, (v16i8)__b) */ +#define msa_hadd_s16(__a, __b) (__builtin_msa_hadd_s_h((v16i8)(__a), (v16i8)(__b))) +/* v4i32 msa_hadd_s32 ((v8i16)__a, (v8i16)__b) */ +#define msa_hadd_s32(__a, __b) (__builtin_msa_hadd_s_w((v8i16)(__a), (v8i16)(__b))) +/* v2i64 msa_hadd_s64 ((v4i32)__a, (v4i32)__b) */ +#define msa_hadd_s64(__a, __b) (__builtin_msa_hadd_s_d((v4i32)(__a), (v4i32)(__b))) + +/* Copy even elements in __a to the left half and even elements in __b to the right half and return the result vector. */ +#define msa_pckev_s8(__a, __b) (__builtin_msa_pckev_b((v16i8)(__a), (v16i8)(__b))) +#define msa_pckev_s16(__a, __b) (__builtin_msa_pckev_h((v8i16)(__a), (v8i16)(__b))) +#define msa_pckev_s32(__a, __b) (__builtin_msa_pckev_w((v4i32)(__a), (v4i32)(__b))) +#define msa_pckev_s64(__a, __b) (__builtin_msa_pckev_d((v2i64)(__a), (v2i64)(__b))) + +/* Copy even elements in __a to the left half and even elements in __b to the right half and return the result vector. */ +#define msa_pckod_s8(__a, __b) (__builtin_msa_pckod_b((v16i8)(__a), (v16i8)(__b))) +#define msa_pckod_s16(__a, __b) (__builtin_msa_pckod_h((v8i16)(__a), (v8i16)(__b))) +#define msa_pckod_s32(__a, __b) (__builtin_msa_pckod_w((v4i32)(__a), (v4i32)(__b))) +#define msa_pckod_s64(__a, __b) (__builtin_msa_pckod_d((v2i64)(__a), (v2i64)(__b))) + +#ifdef _MIPSEB +#define LANE_IMM0_1(x) (0b1 - ((x) & 0b1)) +#define LANE_IMM0_3(x) (0b11 - ((x) & 0b11)) +#define LANE_IMM0_7(x) (0b111 - ((x) & 0b111)) +#define LANE_IMM0_15(x) (0b1111 - ((x) & 0b1111)) +#else +#define LANE_IMM0_1(x) ((x) & 0b1) +#define LANE_IMM0_3(x) ((x) & 0b11) +#define LANE_IMM0_7(x) ((x) & 0b111) +#define LANE_IMM0_15(x) ((x) & 0b1111) +#endif + +#define msa_get_lane_u8(__a, __b) ((uint8_t)(__a)[LANE_IMM0_7(__b)]) +#define msa_get_lane_s8(__a, __b) ((int8_t)(__a)[LANE_IMM0_7(__b)]) +#define msa_get_lane_u16(__a, __b) ((uint16_t)(__a)[LANE_IMM0_3(__b)]) +#define msa_get_lane_s16(__a, __b) ((int16_t)(__a)[LANE_IMM0_3(__b)]) +#define msa_get_lane_u32(__a, __b) ((uint32_t)(__a)[LANE_IMM0_1(__b)]) +#define msa_get_lane_s32(__a, __b) ((int32_t)(__a)[LANE_IMM0_1(__b)]) +#define msa_get_lane_f32(__a, __b) ((float)(__a)[LANE_IMM0_3(__b)]) +#define msa_get_lane_s64(__a, __b) ((int64_t)(__a)[LANE_IMM0_1(__b)]) +#define msa_get_lane_u64(__a, __b) ((uint64_t)(__a)[LANE_IMM0_1(__b)]) +#define msa_get_lane_f64(__a, __b) ((double)(__a)[LANE_IMM0_1(__b)]) +#define msa_getq_lane_u8(__a, imm0_15) ((uint8_t)__builtin_msa_copy_u_b((v16i8)(__a), imm0_15)) +#define msa_getq_lane_s8(__a, imm0_15) ((int8_t)__builtin_msa_copy_s_b(__a, imm0_15)) +#define msa_getq_lane_u16(__a, imm0_7) ((uint16_t)__builtin_msa_copy_u_h((v8i16)(__a), imm0_7)) +#define msa_getq_lane_s16(__a, imm0_7) ((int16_t)__builtin_msa_copy_s_h(__a, imm0_7)) +#define msa_getq_lane_u32(__a, imm0_3) __builtin_msa_copy_u_w((v4i32)(__a), imm0_3) +#define msa_getq_lane_s32 __builtin_msa_copy_s_w +#define msa_getq_lane_f32(__a, __b) ((float)(__a)[LANE_IMM0_3(__b)]) +#define msa_getq_lane_f64(__a, __b) ((double)(__a)[LANE_IMM0_1(__b)]) +#if (__mips == 64) +#define msa_getq_lane_u64(__a, imm0_1) __builtin_msa_copy_u_d((v2i64)(__a), imm0_1) +#define msa_getq_lane_s64 __builtin_msa_copy_s_d +#else +#define msa_getq_lane_u64(__a, imm0_1) ((uint64_t)(__a)[LANE_IMM0_1(imm0_1)]) +#define msa_getq_lane_s64(__a, imm0_1) ((int64_t)(__a)[LANE_IMM0_1(imm0_1)]) +#endif + +/* combine */ +#if (__mips == 64) +#define __COMBINE_64_64(__TYPE, a, b) ((__TYPE)((v2u64){((v1u64)(a))[0], ((v1u64)(b))[0]})) +#else +#define __COMBINE_64_64(__TYPE, a, b) ((__TYPE)((v4u32){((v2u32)(a))[0], ((v2u32)(a))[1], \ + ((v2u32)(b))[0], ((v2u32)(b))[1]})) +#endif + +/* v16i8 msa_combine_s8 (v8i8 __a, v8i8 __b) */ +#define msa_combine_s8(__a, __b) __COMBINE_64_64(v16i8, __a, __b) + +/* v8i16 msa_combine_s16(v4i16 __a, v4i16 __b) */ +#define msa_combine_s16(__a, __b) __COMBINE_64_64(v8i16, __a, __b) + +/* v4i32 msa_combine_s32(v2i32 __a, v2i32 __b) */ +#define msa_combine_s32(__a, __b) __COMBINE_64_64(v4i32, __a, __b) + +/* v2i64 msa_combine_s64(v1i64 __a, v1i64 __b) */ +#define msa_combine_s64(__a, __b) __COMBINE_64_64(v2i64, __a, __b) + +/* v4f32 msa_combine_f32(v2f32 __a, v2f32 __b) */ +#define msa_combine_f32(__a, __b) __COMBINE_64_64(v4f32, __a, __b) + +/* v16u8 msa_combine_u8(v8u8 __a, v8u8 __b) */ +#define msa_combine_u8(__a, __b) __COMBINE_64_64(v16u8, __a, __b) + +/* v8u16 msa_combine_u16(v4u16 __a, v4u16 __b) */ +#define msa_combine_u16(__a, __b) __COMBINE_64_64(v8u16, __a, __b) + +/* v4u32 msa_combine_u32(v2u32 __a, v2u32 __b) */ +#define msa_combine_u32(__a, __b) __COMBINE_64_64(v4u32, __a, __b) + +/* v2u64 msa_combine_u64(v1u64 __a, v1u64 __b) */ +#define msa_combine_u64(__a, __b) __COMBINE_64_64(v2u64, __a, __b) + +/* v2f64 msa_combine_f64(v1f64 __a, v1f64 __b) */ +#define msa_combine_f64(__a, __b) __COMBINE_64_64(v2f64, __a, __b) + +/* get_low, get_high */ +#if (__mips == 64) +#define __GET_LOW(__TYPE, a) ((__TYPE)((v1u64)(__builtin_msa_copy_u_d((v2i64)(a), 0)))) +#define __GET_HIGH(__TYPE, a) ((__TYPE)((v1u64)(__builtin_msa_copy_u_d((v2i64)(a), 1)))) +#else +#define __GET_LOW(__TYPE, a) ((__TYPE)(((v2u64)(a))[0])) +#define __GET_HIGH(__TYPE, a) ((__TYPE)(((v2u64)(a))[1])) +#endif + +/* v8i8 msa_get_low_s8(v16i8 __a) */ +#define msa_get_low_s8(__a) __GET_LOW(v8i8, __a) + +/* v4i16 msa_get_low_s16(v8i16 __a) */ +#define msa_get_low_s16(__a) __GET_LOW(v4i16, __a) + +/* v2i32 msa_get_low_s32(v4i32 __a) */ +#define msa_get_low_s32(__a) __GET_LOW(v2i32, __a) + +/* v1i64 msa_get_low_s64(v2i64 __a) */ +#define msa_get_low_s64(__a) __GET_LOW(v1i64, __a) + +/* v8u8 msa_get_low_u8(v16u8 __a) */ +#define msa_get_low_u8(__a) __GET_LOW(v8u8, __a) + +/* v4u16 msa_get_low_u16(v8u16 __a) */ +#define msa_get_low_u16(__a) __GET_LOW(v4u16, __a) + +/* v2u32 msa_get_low_u32(v4u32 __a) */ +#define msa_get_low_u32(__a) __GET_LOW(v2u32, __a) + +/* v1u64 msa_get_low_u64(v2u64 __a) */ +#define msa_get_low_u64(__a) __GET_LOW(v1u64, __a) + +/* v2f32 msa_get_low_f32(v4f32 __a) */ +#define msa_get_low_f32(__a) __GET_LOW(v2f32, __a) + +/* v1f64 msa_get_low_f64(v2f64 __a) */ +#define msa_get_low_f64(__a) __GET_LOW(v1f64, __a) + +/* v8i8 msa_get_high_s8(v16i8 __a) */ +#define msa_get_high_s8(__a) __GET_HIGH(v8i8, __a) + +/* v4i16 msa_get_high_s16(v8i16 __a) */ +#define msa_get_high_s16(__a) __GET_HIGH(v4i16, __a) + +/* v2i32 msa_get_high_s32(v4i32 __a) */ +#define msa_get_high_s32(__a) __GET_HIGH(v2i32, __a) + +/* v1i64 msa_get_high_s64(v2i64 __a) */ +#define msa_get_high_s64(__a) __GET_HIGH(v1i64, __a) + +/* v8u8 msa_get_high_u8(v16u8 __a) */ +#define msa_get_high_u8(__a) __GET_HIGH(v8u8, __a) + +/* v4u16 msa_get_high_u16(v8u16 __a) */ +#define msa_get_high_u16(__a) __GET_HIGH(v4u16, __a) + +/* v2u32 msa_get_high_u32(v4u32 __a) */ +#define msa_get_high_u32(__a) __GET_HIGH(v2u32, __a) + +/* v1u64 msa_get_high_u64(v2u64 __a) */ +#define msa_get_high_u64(__a) __GET_HIGH(v1u64, __a) + +/* v2f32 msa_get_high_f32(v4f32 __a) */ +#define msa_get_high_f32(__a) __GET_HIGH(v2f32, __a) + +/* v1f64 msa_get_high_f64(v2f64 __a) */ +#define msa_get_high_f64(__a) __GET_HIGH(v1f64, __a) + +/* ri = ai * b[lane] */ +/* v4f32 msa_mulq_lane_f32(v4f32 __a, v4f32 __b, const int __lane) */ +#define msa_mulq_lane_f32(__a, __b, __lane) ((__a) * msa_getq_lane_f32(__b, __lane)) + +/* ri = ai + bi * c[lane] */ +/* v4f32 msa_mlaq_lane_f32(v4f32 __a, v4f32 __b, v4f32 __c, const int __lane) */ +#define msa_mlaq_lane_f32(__a, __b, __c, __lane) ((__a) + ((__b) * msa_getq_lane_f32(__c, __lane))) + +/* uint16_t msa_sum_u16(v8u16 __a)*/ +#define msa_sum_u16(__a) \ +({ \ + v4u32 _b; \ + v2u64 _c; \ + _b = __builtin_msa_hadd_u_w(__a, __a); \ + _c = __builtin_msa_hadd_u_d(_b, _b); \ + (uint16_t)(_c[0] + _c[1]); \ +}) + +/* int16_t msa_sum_s16(v8i16 __a) */ +#define msa_sum_s16(__a) \ +({ \ + v4i32 _b; \ + v2i64 _c; \ + _b = __builtin_msa_hadd_s_w(__a, __a); \ + _c = __builtin_msa_hadd_s_d(_b, _b); \ + (int16_t)(_c[0] + _c[1]); \ +}) + + +/* uint32_t msa_sum_u32(v4u32 __a)*/ +#define msa_sum_u32(__a) \ +({ \ + v2u64 _b; \ + _b = __builtin_msa_hadd_u_d(__a, __a); \ + (uint32_t)(_b[0] + _b[1]); \ +}) + +/* int32_t msa_sum_s32(v4i32 __a)*/ +#define msa_sum_s32(__a) \ +({ \ + v2i64 _b; \ + _b = __builtin_msa_hadd_s_d(__a, __a); \ + (int32_t)(_b[0] + _b[1]); \ +}) + +/* uint8_t msa_sum_u8(v16u8 __a)*/ +#define msa_sum_u8(__a) \ +({ \ + v8u16 _b16; \ + v4u32 _c32; \ + _b16 = __builtin_msa_hadd_u_h(__a, __a); \ + _c32 = __builtin_msa_hadd_u_w(_b16, _b16); \ + (uint8_t)msa_sum_u32(_c32); \ +}) + +/* int8_t msa_sum_s8(v16s8 __a)*/ +#define msa_sum_s8(__a) \ +({ \ + v8i16 _b16; \ + v4i32 _c32; \ + _b16 = __builtin_msa_hadd_s_h(__a, __a); \ + _c32 = __builtin_msa_hadd_s_w(_b16, _b16); \ + (int8_t)msa_sum_s32(_c32); \ +}) + +/* float msa_sum_f32(v4f32 __a)*/ +#define msa_sum_f32(__a) ((__a)[0] + (__a)[1] + (__a)[2] + (__a)[3]) + +/* v8u16 msa_paddlq_u8(v16u8 __a) */ +#define msa_paddlq_u8(__a) (__builtin_msa_hadd_u_h(__a, __a)) + +/* v8i16 msa_paddlq_s8(v16i8 __a) */ +#define msa_paddlq_s8(__a) (__builtin_msa_hadd_s_h(__a, __a)) + +/* v4u32 msa_paddlq_u16 (v8u16 __a)*/ +#define msa_paddlq_u16(__a) (__builtin_msa_hadd_u_w(__a, __a)) + +/* v4i32 msa_paddlq_s16 (v8i16 __a)*/ +#define msa_paddlq_s16(__a) (__builtin_msa_hadd_s_w(__a, __a)) + +/* v2u64 msa_paddlq_u32(v4u32 __a) */ +#define msa_paddlq_u32(__a) (__builtin_msa_hadd_u_d(__a, __a)) + +/* v2i64 msa_paddlq_s32(v4i32 __a) */ +#define msa_paddlq_s32(__a) (__builtin_msa_hadd_s_d(__a, __a)) + +#define V8U8_2_V8U16(x) {(uint16_t)x[0], (uint16_t)x[1], (uint16_t)x[2], (uint16_t)x[3], \ + (uint16_t)x[4], (uint16_t)x[5], (uint16_t)x[6], (uint16_t)x[7]} +#define V8U8_2_V8I16(x) {(int16_t)x[0], (int16_t)x[1], (int16_t)x[2], (int16_t)x[3], \ + (int16_t)x[4], (int16_t)x[5], (int16_t)x[6], (int16_t)x[7]} +#define V8I8_2_V8I16(x) {(int16_t)x[0], (int16_t)x[1], (int16_t)x[2], (int16_t)x[3], \ + (int16_t)x[4], (int16_t)x[5], (int16_t)x[6], (int16_t)x[7]} +#define V4U16_2_V4U32(x) {(uint32_t)x[0], (uint32_t)x[1], (uint32_t)x[2], (uint32_t)x[3]} +#define V4U16_2_V4I32(x) {(int32_t)x[0], (int32_t)x[1], (int32_t)x[2], (int32_t)x[3]} +#define V4I16_2_V4I32(x) {(int32_t)x[0], (int32_t)x[1], (int32_t)x[2], (int32_t)x[3]} +#define V2U32_2_V2U64(x) {(uint64_t)x[0], (uint64_t)x[1]} +#define V2U32_2_V2I64(x) {(int64_t)x[0], (int64_t)x[1]} + +/* v8u16 msa_mull_u8(v8u8 __a, v8u8 __b) */ +#define msa_mull_u8(__a, __b) ((v8u16)__builtin_msa_mulv_h((v8i16)V8U8_2_V8I16(__a), (v8i16)V8U8_2_V8I16(__b))) + +/* v8i16 msa_mull_s8(v8i8 __a, v8i8 __b)*/ +#define msa_mull_s8(__a, __b) (__builtin_msa_mulv_h((v8i16)V8I8_2_V8I16(__a), (v8i16)V8I8_2_V8I16(__b))) + +/* v4u32 msa_mull_u16(v4u16 __a, v4u16 __b) */ +#define msa_mull_u16(__a, __b) ((v4u32)__builtin_msa_mulv_w((v4i32)V4U16_2_V4I32(__a), (v4i32)V4U16_2_V4I32(__b))) + +/* v4i32 msa_mull_s16(v4i16 __a, v4i16 __b) */ +#define msa_mull_s16(__a, __b) (__builtin_msa_mulv_w((v4i32)V4I16_2_V4I32(__a), (v4i32)V4I16_2_V4I32(__b))) + +/* v2u64 msa_mull_u32(v2u32 __a, v2u32 __b) */ +#define msa_mull_u32(__a, __b) ((v2u64)__builtin_msa_mulv_d((v2i64)V2U32_2_V2I64(__a), (v2i64)V2U32_2_V2I64(__b))) + +/* bitwise and: __builtin_msa_and_v */ +#define msa_andq_u8(__a, __b) ((v16u8)__builtin_msa_and_v((v16u8)(__a), (v16u8)(__b))) +#define msa_andq_s8(__a, __b) ((v16i8)__builtin_msa_and_v((v16u8)(__a), (v16u8)(__b))) +#define msa_andq_u16(__a, __b) ((v8u16)__builtin_msa_and_v((v16u8)(__a), (v16u8)(__b))) +#define msa_andq_s16(__a, __b) ((v8i16)__builtin_msa_and_v((v16u8)(__a), (v16u8)(__b))) +#define msa_andq_u32(__a, __b) ((v4u32)__builtin_msa_and_v((v16u8)(__a), (v16u8)(__b))) +#define msa_andq_s32(__a, __b) ((v4i32)__builtin_msa_and_v((v16u8)(__a), (v16u8)(__b))) +#define msa_andq_u64(__a, __b) ((v2u64)__builtin_msa_and_v((v16u8)(__a), (v16u8)(__b))) +#define msa_andq_s64(__a, __b) ((v2i64)__builtin_msa_and_v((v16u8)(__a), (v16u8)(__b))) + +/* bitwise or: __builtin_msa_or_v */ +#define msa_orrq_u8(__a, __b) ((v16u8)__builtin_msa_or_v((v16u8)(__a), (v16u8)(__b))) +#define msa_orrq_s8(__a, __b) ((v16i8)__builtin_msa_or_v((v16u8)(__a), (v16u8)(__b))) +#define msa_orrq_u16(__a, __b) ((v8u16)__builtin_msa_or_v((v16u8)(__a), (v16u8)(__b))) +#define msa_orrq_s16(__a, __b) ((v8i16)__builtin_msa_or_v((v16u8)(__a), (v16u8)(__b))) +#define msa_orrq_u32(__a, __b) ((v4u32)__builtin_msa_or_v((v16u8)(__a), (v16u8)(__b))) +#define msa_orrq_s32(__a, __b) ((v4i32)__builtin_msa_or_v((v16u8)(__a), (v16u8)(__b))) +#define msa_orrq_u64(__a, __b) ((v2u64)__builtin_msa_or_v((v16u8)(__a), (v16u8)(__b))) +#define msa_orrq_s64(__a, __b) ((v2i64)__builtin_msa_or_v((v16u8)(__a), (v16u8)(__b))) + +/* bitwise xor: __builtin_msa_xor_v */ +#define msa_eorq_u8(__a, __b) ((v16u8)__builtin_msa_xor_v((v16u8)(__a), (v16u8)(__b))) +#define msa_eorq_s8(__a, __b) ((v16i8)__builtin_msa_xor_v((v16u8)(__a), (v16u8)(__b))) +#define msa_eorq_u16(__a, __b) ((v8u16)__builtin_msa_xor_v((v16u8)(__a), (v16u8)(__b))) +#define msa_eorq_s16(__a, __b) ((v8i16)__builtin_msa_xor_v((v16u8)(__a), (v16u8)(__b))) +#define msa_eorq_u32(__a, __b) ((v4u32)__builtin_msa_xor_v((v16u8)(__a), (v16u8)(__b))) +#define msa_eorq_s32(__a, __b) ((v4i32)__builtin_msa_xor_v((v16u8)(__a), (v16u8)(__b))) +#define msa_eorq_u64(__a, __b) ((v2u64)__builtin_msa_xor_v((v16u8)(__a), (v16u8)(__b))) +#define msa_eorq_s64(__a, __b) ((v2i64)__builtin_msa_xor_v((v16u8)(__a), (v16u8)(__b))) + +/* bitwise not: v16u8 __builtin_msa_xori_b (v16u8, 0xff) */ +#define msa_mvnq_u8(__a) ((v16u8)__builtin_msa_xori_b((v16u8)(__a), 0xFF)) +#define msa_mvnq_s8(__a) ((v16i8)__builtin_msa_xori_b((v16u8)(__a), 0xFF)) +#define msa_mvnq_u16(__a) ((v8u16)__builtin_msa_xori_b((v16u8)(__a), 0xFF)) +#define msa_mvnq_s16(__a) ((v8i16)__builtin_msa_xori_b((v16u8)(__a), 0xFF)) +#define msa_mvnq_u32(__a) ((v4u32)__builtin_msa_xori_b((v16u8)(__a), 0xFF)) +#define msa_mvnq_s32(__a) ((v4i32)__builtin_msa_xori_b((v16u8)(__a), 0xFF)) +#define msa_mvnq_u64(__a) ((v2u64)__builtin_msa_xori_b((v16u8)(__a), 0xFF)) +#define msa_mvnq_s64(__a) ((v2i64)__builtin_msa_xori_b((v16u8)(__a), 0xFF)) + +/* compare equal: ceq -> ri = ai == bi ? 1...1:0...0 */ +#define msa_ceqq_u8(__a, __b) ((v16u8)__builtin_msa_ceq_b((v16i8)(__a), (v16i8)(__b))) +#define msa_ceqq_s8(__a, __b) ((v16u8)__builtin_msa_ceq_b((v16i8)(__a), (v16i8)(__b))) +#define msa_ceqq_u16(__a, __b) ((v8u16)__builtin_msa_ceq_h((v8i16)(__a), (v8i16)(__b))) +#define msa_ceqq_s16(__a, __b) ((v8u16)__builtin_msa_ceq_h((v8i16)(__a), (v8i16)(__b))) +#define msa_ceqq_u32(__a, __b) ((v4u32)__builtin_msa_ceq_w((v4i32)(__a), (v4i32)(__b))) +#define msa_ceqq_s32(__a, __b) ((v4u32)__builtin_msa_ceq_w((v4i32)(__a), (v4i32)(__b))) +#define msa_ceqq_f32(__a, __b) ((v4u32)__builtin_msa_fceq_w((v4f32)(__a), (v4f32)(__b))) +#define msa_ceqq_u64(__a, __b) ((v2u64)__builtin_msa_ceq_d((v2i64)(__a), (v2i64)(__b))) +#define msa_ceqq_s64(__a, __b) ((v2u64)__builtin_msa_ceq_d((v2i64)(__a), (v2i64)(__b))) +#define msa_ceqq_f64(__a, __b) ((v2u64)__builtin_msa_fceq_d((v2f64)(__a), (v2f64)(__b))) + +/* Compare less-than: clt -> ri = ai < bi ? 1...1:0...0 */ +#define msa_cltq_u8(__a, __b) ((v16u8)__builtin_msa_clt_u_b((v16u8)(__a), (v16u8)(__b))) +#define msa_cltq_s8(__a, __b) ((v16u8)__builtin_msa_clt_s_b((v16i8)(__a), (v16i8)(__b))) +#define msa_cltq_u16(__a, __b) ((v8u16)__builtin_msa_clt_u_h((v8u16)(__a), (v8u16)(__b))) +#define msa_cltq_s16(__a, __b) ((v8u16)__builtin_msa_clt_s_h((v8i16)(__a), (v8i16)(__b))) +#define msa_cltq_u32(__a, __b) ((v4u32)__builtin_msa_clt_u_w((v4u32)(__a), (v4u32)(__b))) +#define msa_cltq_s32(__a, __b) ((v4u32)__builtin_msa_clt_s_w((v4i32)(__a), (v4i32)(__b))) +#define msa_cltq_f32(__a, __b) ((v4u32)__builtin_msa_fclt_w((v4f32)(__a), (v4f32)(__b))) +#define msa_cltq_u64(__a, __b) ((v2u64)__builtin_msa_clt_u_d((v2u64)(__a), (v2u64)(__b))) +#define msa_cltq_s64(__a, __b) ((v2u64)__builtin_msa_clt_s_d((v2i64)(__a), (v2i64)(__b))) +#define msa_cltq_f64(__a, __b) ((v2u64)__builtin_msa_fclt_d((v2f64)(__a), (v2f64)(__b))) + +/* compare greater-than: cgt -> ri = ai > bi ? 1...1:0...0 */ +#define msa_cgtq_u8(__a, __b) ((v16u8)__builtin_msa_clt_u_b((v16u8)(__b), (v16u8)(__a))) +#define msa_cgtq_s8(__a, __b) ((v16u8)__builtin_msa_clt_s_b((v16i8)(__b), (v16i8)(__a))) +#define msa_cgtq_u16(__a, __b) ((v8u16)__builtin_msa_clt_u_h((v8u16)(__b), (v8u16)(__a))) +#define msa_cgtq_s16(__a, __b) ((v8u16)__builtin_msa_clt_s_h((v8i16)(__b), (v8i16)(__a))) +#define msa_cgtq_u32(__a, __b) ((v4u32)__builtin_msa_clt_u_w((v4u32)(__b), (v4u32)(__a))) +#define msa_cgtq_s32(__a, __b) ((v4u32)__builtin_msa_clt_s_w((v4i32)(__b), (v4i32)(__a))) +#define msa_cgtq_f32(__a, __b) ((v4u32)__builtin_msa_fclt_w((v4f32)(__b), (v4f32)(__a))) +#define msa_cgtq_u64(__a, __b) ((v2u64)__builtin_msa_clt_u_d((v2u64)(__b), (v2u64)(__a))) +#define msa_cgtq_s64(__a, __b) ((v2u64)__builtin_msa_clt_s_d((v2i64)(__b), (v2i64)(__a))) +#define msa_cgtq_f64(__a, __b) ((v2u64)__builtin_msa_fclt_d((v2f64)(__b), (v2f64)(__a))) + +/* compare less-equal: cle -> ri = ai <= bi ? 1...1:0...0 */ +#define msa_cleq_u8(__a, __b) ((v16u8)__builtin_msa_cle_u_b((v16u8)(__a), (v16u8)(__b))) +#define msa_cleq_s8(__a, __b) ((v16u8)__builtin_msa_cle_s_b((v16i8)(__a), (v16i8)(__b))) +#define msa_cleq_u16(__a, __b) ((v8u16)__builtin_msa_cle_u_h((v8u16)(__a), (v8u16)(__b))) +#define msa_cleq_s16(__a, __b) ((v8u16)__builtin_msa_cle_s_h((v8i16)(__a), (v8i16)(__b))) +#define msa_cleq_u32(__a, __b) ((v4u32)__builtin_msa_cle_u_w((v4u32)(__a), (v4u32)(__b))) +#define msa_cleq_s32(__a, __b) ((v4u32)__builtin_msa_cle_s_w((v4i32)(__a), (v4i32)(__b))) +#define msa_cleq_f32(__a, __b) ((v4u32)__builtin_msa_fcle_w((v4f32)(__a), (v4f32)(__b))) +#define msa_cleq_u64(__a, __b) ((v2u64)__builtin_msa_cle_u_d((v2u64)(__a), (v2u64)(__b))) +#define msa_cleq_s64(__a, __b) ((v2u64)__builtin_msa_cle_s_d((v2i64)(__a), (v2i64)(__b))) +#define msa_cleq_f64(__a, __b) ((v2u64)__builtin_msa_fcle_d((v2f64)(__a), (v2f64)(__b))) + +/* compare greater-equal: cge -> ri = ai >= bi ? 1...1:0...0 */ +#define msa_cgeq_u8(__a, __b) ((v16u8)__builtin_msa_cle_u_b((v16u8)(__b), (v16u8)(__a))) +#define msa_cgeq_s8(__a, __b) ((v16u8)__builtin_msa_cle_s_b((v16i8)(__b), (v16i8)(__a))) +#define msa_cgeq_u16(__a, __b) ((v8u16)__builtin_msa_cle_u_h((v8u16)(__b), (v8u16)(__a))) +#define msa_cgeq_s16(__a, __b) ((v8u16)__builtin_msa_cle_s_h((v8i16)(__b), (v8i16)(__a))) +#define msa_cgeq_u32(__a, __b) ((v4u32)__builtin_msa_cle_u_w((v4u32)(__b), (v4u32)(__a))) +#define msa_cgeq_s32(__a, __b) ((v4u32)__builtin_msa_cle_s_w((v4i32)(__b), (v4i32)(__a))) +#define msa_cgeq_f32(__a, __b) ((v4u32)__builtin_msa_fcle_w((v4f32)(__b), (v4f32)(__a))) +#define msa_cgeq_u64(__a, __b) ((v2u64)__builtin_msa_cle_u_d((v2u64)(__b), (v2u64)(__a))) +#define msa_cgeq_s64(__a, __b) ((v2u64)__builtin_msa_cle_s_d((v2i64)(__b), (v2i64)(__a))) +#define msa_cgeq_f64(__a, __b) ((v2u64)__builtin_msa_fcle_d((v2f64)(__b), (v2f64)(__a))) + +/* Shift Left Logical: shl -> ri = ai << bi; */ +#define msa_shlq_u8(__a, __b) ((v16u8)__builtin_msa_sll_b((v16i8)(__a), (v16i8)(__b))) +#define msa_shlq_s8(__a, __b) ((v16i8)__builtin_msa_sll_b((v16i8)(__a), (v16i8)(__b))) +#define msa_shlq_u16(__a, __b) ((v8u16)__builtin_msa_sll_h((v8i16)(__a), (v8i16)(__b))) +#define msa_shlq_s16(__a, __b) ((v8i16)__builtin_msa_sll_h((v8i16)(__a), (v8i16)(__b))) +#define msa_shlq_u32(__a, __b) ((v4u32)__builtin_msa_sll_w((v4i32)(__a), (v4i32)(__b))) +#define msa_shlq_s32(__a, __b) ((v4i32)__builtin_msa_sll_w((v4i32)(__a), (v4i32)(__b))) +#define msa_shlq_u64(__a, __b) ((v2u64)__builtin_msa_sll_d((v2i64)(__a), (v2i64)(__b))) +#define msa_shlq_s64(__a, __b) ((v2i64)__builtin_msa_sll_d((v2i64)(__a), (v2i64)(__b))) + +/* Immediate Shift Left Logical: shl -> ri = ai << imm; */ +#define msa_shlq_n_u8(__a, __imm) ((v16u8)__builtin_msa_slli_b((v16i8)(__a), __imm)) +#define msa_shlq_n_s8(__a, __imm) ((v16i8)__builtin_msa_slli_b((v16i8)(__a), __imm)) +#define msa_shlq_n_u16(__a, __imm) ((v8u16)__builtin_msa_slli_h((v8i16)(__a), __imm)) +#define msa_shlq_n_s16(__a, __imm) ((v8i16)__builtin_msa_slli_h((v8i16)(__a), __imm)) +#define msa_shlq_n_u32(__a, __imm) ((v4u32)__builtin_msa_slli_w((v4i32)(__a), __imm)) +#define msa_shlq_n_s32(__a, __imm) ((v4i32)__builtin_msa_slli_w((v4i32)(__a), __imm)) +#define msa_shlq_n_u64(__a, __imm) ((v2u64)__builtin_msa_slli_d((v2i64)(__a), __imm)) +#define msa_shlq_n_s64(__a, __imm) ((v2i64)__builtin_msa_slli_d((v2i64)(__a), __imm)) + +/* shift right: shrq -> ri = ai >> bi; */ +#define msa_shrq_u8(__a, __b) ((v16u8)__builtin_msa_srl_b((v16i8)(__a), (v16i8)(__b))) +#define msa_shrq_s8(__a, __b) ((v16i8)__builtin_msa_sra_b((v16i8)(__a), (v16i8)(__b))) +#define msa_shrq_u16(__a, __b) ((v8u16)__builtin_msa_srl_h((v8i16)(__a), (v8i16)(__b))) +#define msa_shrq_s16(__a, __b) ((v8i16)__builtin_msa_sra_h((v8i16)(__a), (v8i16)(__b))) +#define msa_shrq_u32(__a, __b) ((v4u32)__builtin_msa_srl_w((v4i32)(__a), (v4i32)(__b))) +#define msa_shrq_s32(__a, __b) ((v4i32)__builtin_msa_sra_w((v4i32)(__a), (v4i32)(__b))) +#define msa_shrq_u64(__a, __b) ((v2u64)__builtin_msa_srl_d((v2i64)(__a), (v2i64)(__b))) +#define msa_shrq_s64(__a, __b) ((v2i64)__builtin_msa_sra_d((v2i64)(__a), (v2i64)(__b))) + +/* Immediate Shift Right: shr -> ri = ai >> imm; */ +#define msa_shrq_n_u8(__a, __imm) ((v16u8)__builtin_msa_srli_b((v16i8)(__a), __imm)) +#define msa_shrq_n_s8(__a, __imm) ((v16i8)__builtin_msa_srai_b((v16i8)(__a), __imm)) +#define msa_shrq_n_u16(__a, __imm) ((v8u16)__builtin_msa_srli_h((v8i16)(__a), __imm)) +#define msa_shrq_n_s16(__a, __imm) ((v8i16)__builtin_msa_srai_h((v8i16)(__a), __imm)) +#define msa_shrq_n_u32(__a, __imm) ((v4u32)__builtin_msa_srli_w((v4i32)(__a), __imm)) +#define msa_shrq_n_s32(__a, __imm) ((v4i32)__builtin_msa_srai_w((v4i32)(__a), __imm)) +#define msa_shrq_n_u64(__a, __imm) ((v2u64)__builtin_msa_srli_d((v2i64)(__a), __imm)) +#define msa_shrq_n_s64(__a, __imm) ((v2i64)__builtin_msa_srai_d((v2i64)(__a), __imm)) + +/* Immediate Shift Right Rounded: shr -> ri = ai >> (rounded)imm; */ +#define msa_rshrq_n_u8(__a, __imm) ((v16u8)__builtin_msa_srlri_b((v16i8)(__a), __imm)) +#define msa_rshrq_n_s8(__a, __imm) ((v16i8)__builtin_msa_srari_b((v16i8)(__a), __imm)) +#define msa_rshrq_n_u16(__a, __imm) ((v8u16)__builtin_msa_srlri_h((v8i16)(__a), __imm)) +#define msa_rshrq_n_s16(__a, __imm) ((v8i16)__builtin_msa_srari_h((v8i16)(__a), __imm)) +#define msa_rshrq_n_u32(__a, __imm) ((v4u32)__builtin_msa_srlri_w((v4i32)(__a), __imm)) +#define msa_rshrq_n_s32(__a, __imm) ((v4i32)__builtin_msa_srari_w((v4i32)(__a), __imm)) +#define msa_rshrq_n_u64(__a, __imm) ((v2u64)__builtin_msa_srlri_d((v2i64)(__a), __imm)) +#define msa_rshrq_n_s64(__a, __imm) ((v2i64)__builtin_msa_srari_d((v2i64)(__a), __imm)) + +/* Vector saturating rounding shift left, qrshl -> ri = ai << bi; */ +#define msa_qrshrq_s32(a, b) ((v4i32)__msa_srar_w((v4i32)(a), (v4i32)(b))) + +/* Rename the msa builtin func to unify the name style for intrin_msa.hpp */ +#define msa_qaddq_u8 __builtin_msa_adds_u_b +#define msa_qaddq_s8 __builtin_msa_adds_s_b +#define msa_qaddq_u16 __builtin_msa_adds_u_h +#define msa_qaddq_s16 __builtin_msa_adds_s_h +#define msa_qaddq_u32 __builtin_msa_adds_u_w +#define msa_qaddq_s32 __builtin_msa_adds_s_w +#define msa_qaddq_u64 __builtin_msa_adds_u_d +#define msa_qaddq_s64 __builtin_msa_adds_s_d +#define msa_addq_u8(a, b) ((v16u8)__builtin_msa_addv_b((v16i8)(a), (v16i8)(b))) +#define msa_addq_s8 __builtin_msa_addv_b +#define msa_addq_u16(a, b) ((v8u16)__builtin_msa_addv_h((v8i16)(a), (v8i16)(b))) +#define msa_addq_s16 __builtin_msa_addv_h +#define msa_addq_u32(a, b) ((v4u32)__builtin_msa_addv_w((v4i32)(a), (v4i32)(b))) +#define msa_addq_s32 __builtin_msa_addv_w +#define msa_addq_f32 __builtin_msa_fadd_w +#define msa_addq_u64(a, b) ((v2u64)__builtin_msa_addv_d((v2i64)(a), (v2i64)(b))) +#define msa_addq_s64 __builtin_msa_addv_d +#define msa_addq_f64 __builtin_msa_fadd_d +#define msa_qsubq_u8 __builtin_msa_subs_u_b +#define msa_qsubq_s8 __builtin_msa_subs_s_b +#define msa_qsubq_u16 __builtin_msa_subs_u_h +#define msa_qsubq_s16 __builtin_msa_subs_s_h +#define msa_subq_u8(a, b) ((v16u8)__builtin_msa_subv_b((v16i8)(a), (v16i8)(b))) +#define msa_subq_s8 __builtin_msa_subv_b +#define msa_subq_u16(a, b) ((v8u16)__builtin_msa_subv_h((v8i16)(a), (v8i16)(b))) +#define msa_subq_s16 __builtin_msa_subv_h +#define msa_subq_u32(a, b) ((v4u32)__builtin_msa_subv_w((v4i32)(a), (v4i32)(b))) +#define msa_subq_s32 __builtin_msa_subv_w +#define msa_subq_f32 __builtin_msa_fsub_w +#define msa_subq_u64(a, b) ((v2u64)__builtin_msa_subv_d((v2i64)(a), (v2i64)(b))) +#define msa_subq_s64 __builtin_msa_subv_d +#define msa_subq_f64 __builtin_msa_fsub_d +#define msa_mulq_u8(a, b) ((v16u8)__builtin_msa_mulv_b((v16i8)(a), (v16i8)(b))) +#define msa_mulq_s8(a, b) ((v16i8)__builtin_msa_mulv_b((v16i8)(a), (v16i8)(b))) +#define msa_mulq_u16(a, b) ((v8u16)__builtin_msa_mulv_h((v8i16)(a), (v8i16)(b))) +#define msa_mulq_s16(a, b) ((v8i16)__builtin_msa_mulv_h((v8i16)(a), (v8i16)(b))) +#define msa_mulq_u32(a, b) ((v4u32)__builtin_msa_mulv_w((v4i32)(a), (v4i32)(b))) +#define msa_mulq_s32(a, b) ((v4i32)__builtin_msa_mulv_w((v4i32)(a), (v4i32)(b))) +#define msa_mulq_u64(a, b) ((v2u64)__builtin_msa_mulv_d((v2i64)(a), (v2i64)(b))) +#define msa_mulq_s64(a, b) ((v2i64)__builtin_msa_mulv_d((v2i64)(a), (v2i64)(b))) +#define msa_mulq_f32 __builtin_msa_fmul_w +#define msa_mulq_f64 __builtin_msa_fmul_d +#define msa_divq_f32 __builtin_msa_fdiv_w +#define msa_divq_f64 __builtin_msa_fdiv_d +#define msa_dotp_s_h __builtin_msa_dotp_s_h +#define msa_dotp_s_w __builtin_msa_dotp_s_w +#define msa_dotp_s_d __builtin_msa_dotp_s_d +#define msa_dotp_u_h __builtin_msa_dotp_u_h +#define msa_dotp_u_w __builtin_msa_dotp_u_w +#define msa_dotp_u_d __builtin_msa_dotp_u_d +#define msa_dpadd_s_h __builtin_msa_dpadd_s_h +#define msa_dpadd_s_w __builtin_msa_dpadd_s_w +#define msa_dpadd_s_d __builtin_msa_dpadd_s_d +#define msa_dpadd_u_h __builtin_msa_dpadd_u_h +#define msa_dpadd_u_w __builtin_msa_dpadd_u_w +#define msa_dpadd_u_d __builtin_msa_dpadd_u_d + +#define ILVRL_B2(RTYPE, in0, in1, low, hi) do { \ + low = (RTYPE)__builtin_msa_ilvr_b((v16i8)(in0), (v16i8)(in1)); \ + hi = (RTYPE)__builtin_msa_ilvl_b((v16i8)(in0), (v16i8)(in1)); \ + } while (0) +#define ILVRL_B2_UB(...) ILVRL_B2(v16u8, __VA_ARGS__) +#define ILVRL_B2_SB(...) ILVRL_B2(v16i8, __VA_ARGS__) +#define ILVRL_B2_UH(...) ILVRL_B2(v8u16, __VA_ARGS__) +#define ILVRL_B2_SH(...) ILVRL_B2(v8i16, __VA_ARGS__) +#define ILVRL_B2_SW(...) ILVRL_B2(v4i32, __VA_ARGS__) + +#define ILVRL_H2(RTYPE, in0, in1, low, hi) do { \ + low = (RTYPE)__builtin_msa_ilvr_h((v8i16)(in0), (v8i16)(in1)); \ + hi = (RTYPE)__builtin_msa_ilvl_h((v8i16)(in0), (v8i16)(in1)); \ + } while (0) +#define ILVRL_H2_UB(...) ILVRL_H2(v16u8, __VA_ARGS__) +#define ILVRL_H2_SB(...) ILVRL_H2(v16i8, __VA_ARGS__) +#define ILVRL_H2_UH(...) ILVRL_H2(v8u16, __VA_ARGS__) +#define ILVRL_H2_SH(...) ILVRL_H2(v8i16, __VA_ARGS__) +#define ILVRL_H2_SW(...) ILVRL_H2(v4i32, __VA_ARGS__) +#define ILVRL_H2_UW(...) ILVRL_H2(v4u32, __VA_ARGS__) + +#define ILVRL_W2(RTYPE, in0, in1, low, hi) do { \ + low = (RTYPE)__builtin_msa_ilvr_w((v4i32)(in0), (v4i32)(in1)); \ + hi = (RTYPE)__builtin_msa_ilvl_w((v4i32)(in0), (v4i32)(in1)); \ + } while (0) +#define ILVRL_W2_UB(...) ILVRL_W2(v16u8, __VA_ARGS__) +#define ILVRL_W2_SH(...) ILVRL_W2(v8i16, __VA_ARGS__) +#define ILVRL_W2_SW(...) ILVRL_W2(v4i32, __VA_ARGS__) +#define ILVRL_W2_UW(...) ILVRL_W2(v4u32, __VA_ARGS__) + +/* absq, qabsq (r = |a|;) */ +#define msa_absq_s8(a) __builtin_msa_add_a_b(a, __builtin_msa_fill_b(0)) +#define msa_absq_s16(a) __builtin_msa_add_a_h(a, __builtin_msa_fill_h(0)) +#define msa_absq_s32(a) __builtin_msa_add_a_w(a, __builtin_msa_fill_w(0)) +#define msa_absq_s64(a) __builtin_msa_add_a_d(a, __builtin_msa_fill_d(0)) +#define msa_absq_f32(a) ((v4f32)__builtin_msa_bclri_w((v4u32)(a), 31)) +#define msa_absq_f64(a) ((v2f64)__builtin_msa_bclri_d((v2u64)(a), 63)) +#define msa_qabsq_s8(a) __builtin_msa_adds_a_b(a, __builtin_msa_fill_b(0)) +#define msa_qabsq_s16(a) __builtin_msa_adds_a_h(a, __builtin_msa_fill_h(0)) +#define msa_qabsq_s32(a) __builtin_msa_adds_a_w(a, __builtin_msa_fill_w(0)) +#define msa_qabsq_s64(a) __builtin_msa_adds_a_d(a, __builtin_msa_fill_d(0)) + +/* abdq, qabdq (r = |a - b|;) */ +#define msa_abdq_u8 __builtin_msa_asub_u_b +#define msa_abdq_s8 __builtin_msa_asub_s_b +#define msa_abdq_u16 __builtin_msa_asub_u_h +#define msa_abdq_s16 __builtin_msa_asub_s_h +#define msa_abdq_u32 __builtin_msa_asub_u_w +#define msa_abdq_s32 __builtin_msa_asub_s_w +#define msa_abdq_u64 __builtin_msa_asub_u_d +#define msa_abdq_s64 __builtin_msa_asub_s_d +#define msa_abdq_f32(a, b) msa_absq_f32(__builtin_msa_fsub_w(a, b)) +#define msa_abdq_f64(a, b) msa_absq_f64(__builtin_msa_fsub_d(a, b)) +#define msa_qabdq_s8(a, b) msa_qabsq_s8(__builtin_msa_subs_s_b(a, b)) +#define msa_qabdq_s16(a, b) msa_qabsq_s16(__builtin_msa_subs_s_h(a, b)) +#define msa_qabdq_s32(a, b) msa_qabsq_s32(__builtin_msa_subs_s_w(a, b)) +#define msa_qabdq_s64(a, b) msa_qabsq_s64(__builtin_msa_subs_s_d(a, b)) + +/* sqrtq, rsqrtq */ +#define msa_sqrtq_f32 __builtin_msa_fsqrt_w +#define msa_sqrtq_f64 __builtin_msa_fsqrt_d +#define msa_rsqrtq_f32 __builtin_msa_frsqrt_w +#define msa_rsqrtq_f64 __builtin_msa_frsqrt_d + + +/* mlaq: r = a + b * c; */ +__extension__ extern __inline v4i32 +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +msa_mlaq_s32(v4i32 __a, v4i32 __b, v4i32 __c) +{ + __asm__ volatile("maddv.w %w[__a], %w[__b], %w[__c]\n" + // Outputs + : [__a] "+f"(__a) + // Inputs + : [__b] "f"(__b), [__c] "f"(__c)); + return __a; +} + +__extension__ extern __inline v2i64 +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +msa_mlaq_s64(v2i64 __a, v2i64 __b, v2i64 __c) +{ + __asm__ volatile("maddv.d %w[__a], %w[__b], %w[__c]\n" + // Outputs + : [__a] "+f"(__a) + // Inputs + : [__b] "f"(__b), [__c] "f"(__c)); + return __a; +} + +__extension__ extern __inline v4f32 +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +msa_mlaq_f32(v4f32 __a, v4f32 __b, v4f32 __c) +{ + __asm__ volatile("fmadd.w %w[__a], %w[__b], %w[__c]\n" + // Outputs + : [__a] "+f"(__a) + // Inputs + : [__b] "f"(__b), [__c] "f"(__c)); + return __a; +} + +__extension__ extern __inline v2f64 +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +msa_mlaq_f64(v2f64 __a, v2f64 __b, v2f64 __c) +{ + __asm__ volatile("fmadd.d %w[__a], %w[__b], %w[__c]\n" + // Outputs + : [__a] "+f"(__a) + // Inputs + : [__b] "f"(__b), [__c] "f"(__c)); + return __a; +} + +/* cntq */ +#define msa_cntq_s8 __builtin_msa_pcnt_b +#define msa_cntq_s16 __builtin_msa_pcnt_h +#define msa_cntq_s32 __builtin_msa_pcnt_w +#define msa_cntq_s64 __builtin_msa_pcnt_d + +/* bslq (a: mask; r = b(if a == 0); r = c(if a == 1);) */ +#define msa_bslq_u8 __builtin_msa_bsel_v + +/* ilvrq, ilvlq (For EL only, ilvrq: b0, a0, b1, a1; ilvlq: b2, a2, b3, a3;) */ +#define msa_ilvrq_s8 __builtin_msa_ilvr_b +#define msa_ilvrq_s16 __builtin_msa_ilvr_h +#define msa_ilvrq_s32 __builtin_msa_ilvr_w +#define msa_ilvrq_s64 __builtin_msa_ilvr_d +#define msa_ilvlq_s8 __builtin_msa_ilvl_b +#define msa_ilvlq_s16 __builtin_msa_ilvl_h +#define msa_ilvlq_s32 __builtin_msa_ilvl_w +#define msa_ilvlq_s64 __builtin_msa_ilvl_d + +/* ilvevq, ilvodq (ilvevq: b0, a0, b2, a2; ilvodq: b1, a1, b3, a3; ) */ +#define msa_ilvevq_s8 __builtin_msa_ilvev_b +#define msa_ilvevq_s16 __builtin_msa_ilvev_h +#define msa_ilvevq_s32 __builtin_msa_ilvev_w +#define msa_ilvevq_s64 __builtin_msa_ilvev_d +#define msa_ilvodq_s8 __builtin_msa_ilvod_b +#define msa_ilvodq_s16 __builtin_msa_ilvod_h +#define msa_ilvodq_s32 __builtin_msa_ilvod_w +#define msa_ilvodq_s64 __builtin_msa_ilvod_d + +/* extq (r = (a || b); a concatenation b and get elements from index c) */ +#ifdef _MIPSEB +#define msa_extq_s8(a, b, c) \ +(__builtin_msa_vshf_b(__builtin_msa_subv_b((v16i8)((v2i64){0x1716151413121110, 0x1F1E1D1C1B1A1918}), __builtin_msa_fill_b(c)), a, b)) +#define msa_extq_s16(a, b, c) \ +(__builtin_msa_vshf_h(__builtin_msa_subv_h((v8i16)((v2i64){0x000B000A00090008, 0x000F000E000D000C}), __builtin_msa_fill_h(c)), a, b)) +#define msa_extq_s32(a, b, c) \ +(__builtin_msa_vshf_w(__builtin_msa_subv_w((v4i32)((v2i64){0x0000000500000004, 0x0000000700000006}), __builtin_msa_fill_w(c)), a, b)) +#define msa_extq_s64(a, b, c) \ +(__builtin_msa_vshf_d(__builtin_msa_subv_d((v2i64){0x0000000000000002, 0x0000000000000003}, __builtin_msa_fill_d(c)), a, b)) +#else +#define msa_extq_s8(a, b, c) \ +(__builtin_msa_vshf_b(__builtin_msa_addv_b((v16i8)((v2i64){0x0706050403020100, 0x0F0E0D0C0B0A0908}), __builtin_msa_fill_b(c)), b, a)) +#define msa_extq_s16(a, b, c) \ +(__builtin_msa_vshf_h(__builtin_msa_addv_h((v8i16)((v2i64){0x0003000200010000, 0x0007000600050004}), __builtin_msa_fill_h(c)), b, a)) +#define msa_extq_s32(a, b, c) \ +(__builtin_msa_vshf_w(__builtin_msa_addv_w((v4i32)((v2i64){0x0000000100000000, 0x0000000300000002}), __builtin_msa_fill_w(c)), b, a)) +#define msa_extq_s64(a, b, c) \ +(__builtin_msa_vshf_d(__builtin_msa_addv_d((v2i64){0x0000000000000000, 0x0000000000000001}, __builtin_msa_fill_d(c)), b, a)) +#endif /* _MIPSEB */ + +/* cvttruncq, cvttintq, cvtrintq */ +#define msa_cvttruncq_u32_f32 __builtin_msa_ftrunc_u_w +#define msa_cvttruncq_s32_f32 __builtin_msa_ftrunc_s_w +#define msa_cvttruncq_u64_f64 __builtin_msa_ftrunc_u_d +#define msa_cvttruncq_s64_f64 __builtin_msa_ftrunc_s_d +#define msa_cvttintq_u32_f32 __builtin_msa_ftint_u_w +#define msa_cvttintq_s32_f32 __builtin_msa_ftint_s_w +#define msa_cvttintq_u64_f64 __builtin_msa_ftint_u_d +#define msa_cvttintq_s64_f64 __builtin_msa_ftint_s_d +#define msa_cvtrintq_f32 __builtin_msa_frint_w +#define msa_cvtrintq_f64 __builtin_msa_frint_d + +/* cvtfintq, cvtfq */ +#define msa_cvtfintq_f32_u32 __builtin_msa_ffint_u_w +#define msa_cvtfintq_f32_s32 __builtin_msa_ffint_s_w +#define msa_cvtfintq_f64_u64 __builtin_msa_ffint_u_d +#define msa_cvtfintq_f64_s64 __builtin_msa_ffint_s_d +#define msa_cvtfq_f32_f64 __builtin_msa_fexdo_w +#define msa_cvtflq_f64_f32 __builtin_msa_fexupr_d +#define msa_cvtfhq_f64_f32 __builtin_msa_fexupl_d + +#define msa_addl_u8(a, b) ((v8u16)__builtin_msa_addv_h((v8i16)V8U8_2_V8I16(a), (v8i16)V8U8_2_V8I16(b))) +#define msa_addl_s8(a, b) (__builtin_msa_addv_h((v8i16)V8I8_2_V8I16(a), (v8i16)V8I8_2_V8I16(b))) +#define msa_addl_u16(a, b) ((v4u32)__builtin_msa_addv_w((v4i32)V4U16_2_V4I32(a), (v4i32)V4U16_2_V4I32(b))) +#define msa_addl_s16(a, b) (__builtin_msa_addv_w((v4i32)V4I16_2_V4I32(a), (v4i32)V4I16_2_V4I32(b))) +#define msa_subl_s16(a, b) (__builtin_msa_subv_w((v4i32)V4I16_2_V4I32(a), (v4i32)V4I16_2_V4I32(b))) +#define msa_recpeq_f32 __builtin_msa_frcp_w +#define msa_recpsq_f32(a, b) (__builtin_msa_fsub_w(msa_dupq_n_f32(2.0f), __builtin_msa_fmul_w(a, b))) + +#define MSA_INTERLEAVED_IMPL_LOAD2_STORE2(_Tp, _Tpv, _Tpvs, suffix, df, nlanes) \ +__extension__ extern __inline void \ +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) \ +msa_ld2q_##suffix(const _Tp* ptr, _Tpv* a, _Tpv* b) \ +{ \ + _Tpv v0 = msa_ld1q_##suffix(ptr); \ + _Tpv v1 = msa_ld1q_##suffix(ptr + nlanes); \ + *a = (_Tpv)__builtin_msa_pckev_##df((_Tpvs)v1, (_Tpvs)v0); \ + *b = (_Tpv)__builtin_msa_pckod_##df((_Tpvs)v1, (_Tpvs)v0); \ +} \ +__extension__ extern __inline void \ +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) \ +msa_st2q_##suffix(_Tp* ptr, const _Tpv a, const _Tpv b) \ +{ \ + msa_st1q_##suffix(ptr, (_Tpv)__builtin_msa_ilvr_##df((_Tpvs)b, (_Tpvs)a)); \ + msa_st1q_##suffix(ptr + nlanes, (_Tpv)__builtin_msa_ilvl_##df((_Tpvs)b, (_Tpvs)a)); \ +} + +MSA_INTERLEAVED_IMPL_LOAD2_STORE2(uint8_t, v16u8, v16i8, u8, b, 16) +MSA_INTERLEAVED_IMPL_LOAD2_STORE2(int8_t, v16i8, v16i8, s8, b, 16) +MSA_INTERLEAVED_IMPL_LOAD2_STORE2(uint16_t, v8u16, v8i16, u16, h, 8) +MSA_INTERLEAVED_IMPL_LOAD2_STORE2(int16_t, v8i16, v8i16, s16, h, 8) +MSA_INTERLEAVED_IMPL_LOAD2_STORE2(uint32_t, v4u32, v4i32, u32, w, 4) +MSA_INTERLEAVED_IMPL_LOAD2_STORE2(int32_t, v4i32, v4i32, s32, w, 4) +MSA_INTERLEAVED_IMPL_LOAD2_STORE2(float, v4f32, v4i32, f32, w, 4) +MSA_INTERLEAVED_IMPL_LOAD2_STORE2(uint64_t, v2u64, v2i64, u64, d, 2) +MSA_INTERLEAVED_IMPL_LOAD2_STORE2(int64_t, v2i64, v2i64, s64, d, 2) +MSA_INTERLEAVED_IMPL_LOAD2_STORE2(double, v2f64, v2i64, f64, d, 2) + +#ifdef _MIPSEB +#define MSA_INTERLEAVED_IMPL_LOAD3_8(_Tp, _Tpv, _Tpvs, suffix) \ +__extension__ extern __inline void \ +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) \ +msa_ld3q_##suffix(const _Tp* ptr, _Tpv* a, _Tpv* b, _Tpv* c) \ +{ \ + _Tpv v0 = msa_ld1q_##suffix(ptr); \ + _Tpv v1 = msa_ld1q_##suffix(ptr + 16); \ + _Tpv v2 = msa_ld1q_##suffix(ptr + 32); \ + _Tpvs v3 = __builtin_msa_vshf_b((_Tpvs)((v2i64){0x0704011F1F1F1F1F, 0x1F1C191613100D0A}), (_Tpvs)v0, (_Tpvs)v1); \ + *a = (_Tpv)__builtin_msa_vshf_b((_Tpvs)((v2i64){0x1716150E0B080502, 0x1F1E1D1C1B1A1918}), v3, (_Tpvs)v2); \ + v3 = __builtin_msa_vshf_b((_Tpvs)((v2i64){0x0603001F1F1F1F1F, 0x1E1B1815120F0C09}), (_Tpvs)v0, (_Tpvs)v1); \ + *b = (_Tpv)__builtin_msa_vshf_b((_Tpvs)((v2i64){0x1716150D0A070401, 0x1F1E1D1C1B1A1918}), v3, (_Tpvs)v2); \ + v3 = __builtin_msa_vshf_b((_Tpvs)((v2i64){0x05021F1F1F1F1F1F, 0x1D1A1714110E0B08}), (_Tpvs)v0, (_Tpvs)v1); \ + *c = (_Tpv)__builtin_msa_vshf_b((_Tpvs)((v2i64){0x17160F0C09060300, 0x1F1E1D1C1B1A1918}), v3, (_Tpvs)v2); \ +} +#else +#define MSA_INTERLEAVED_IMPL_LOAD3_8(_Tp, _Tpv, _Tpvs, suffix) \ +__extension__ extern __inline void \ +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) \ +msa_ld3q_##suffix(const _Tp* ptr, _Tpv* a, _Tpv* b, _Tpv* c) \ +{ \ + _Tpv v0 = msa_ld1q_##suffix(ptr); \ + _Tpv v1 = msa_ld1q_##suffix(ptr + 16); \ + _Tpv v2 = msa_ld1q_##suffix(ptr + 32); \ + _Tpvs v3 = __builtin_msa_vshf_b((_Tpvs)((v2i64){0x15120F0C09060300, 0x00000000001E1B18}), (_Tpvs)v1, (_Tpvs)v0); \ + *a = (_Tpv)__builtin_msa_vshf_b((_Tpvs)((v2i64){0x0706050403020100, 0x1D1A1714110A0908}), (_Tpvs)v2, v3); \ + v3 = __builtin_msa_vshf_b((_Tpvs)((v2i64){0x1613100D0A070401, 0x00000000001F1C19}), (_Tpvs)v1, (_Tpvs)v0); \ + *b = (_Tpv)__builtin_msa_vshf_b((_Tpvs)((v2i64){0x0706050403020100, 0x1E1B1815120A0908}), (_Tpvs)v2, v3); \ + v3 = __builtin_msa_vshf_b((_Tpvs)((v2i64){0x1714110E0B080502, 0x0000000000001D1A}), (_Tpvs)v1, (_Tpvs)v0); \ + *c = (_Tpv)__builtin_msa_vshf_b((_Tpvs)((v2i64){0x0706050403020100, 0x1F1C191613100908}), (_Tpvs)v2, v3); \ +} +#endif + +MSA_INTERLEAVED_IMPL_LOAD3_8(uint8_t, v16u8, v16i8, u8) +MSA_INTERLEAVED_IMPL_LOAD3_8(int8_t, v16i8, v16i8, s8) + +#ifdef _MIPSEB +#define MSA_INTERLEAVED_IMPL_LOAD3_16(_Tp, _Tpv, _Tpvs, suffix) \ +__extension__ extern __inline void \ +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) \ +msa_ld3q_##suffix(const _Tp* ptr, _Tpv* a, _Tpv* b, _Tpv* c) \ +{ \ + _Tpv v0 = msa_ld1q_##suffix(ptr); \ + _Tpv v1 = msa_ld1q_##suffix(ptr + 8); \ + _Tpv v2 = msa_ld1q_##suffix(ptr + 16); \ + _Tpvs v3 = __builtin_msa_vshf_h((_Tpvs)((v2i64){0x00030000000F000F, 0x000F000C00090006}), (_Tpvs)v1, (_Tpvs)v0); \ + *a = (_Tpv)__builtin_msa_vshf_h((_Tpvs)((v2i64){0x000B000A00050002, 0x000F000E000D000C}), (_Tpvs)v2, v3); \ + v3 = __builtin_msa_vshf_h((_Tpvs)((v2i64){0x0002000F000F000F, 0x000E000B00080005}), (_Tpvs)v1, (_Tpvs)v0); \ + *b = (_Tpv)__builtin_msa_vshf_h((_Tpvs)((v2i64){0x000B000700040001, 0x000F000E000D000C}), (_Tpvs)v2, v3); \ + v3 = __builtin_msa_vshf_h((_Tpvs)((v2i64){0x0001000F000F000F, 0x000D000A00070004}), (_Tpvs)v1, (_Tpvs)v0); \ + *c = (_Tpv)__builtin_msa_vshf_h((_Tpvs)((v2i64){0x000B000600030000, 0x000F000E000D000C}), (_Tpvs)v2, v3); \ +} +#else +#define MSA_INTERLEAVED_IMPL_LOAD3_16(_Tp, _Tpv, _Tpvs, suffix) \ +__extension__ extern __inline void \ +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) \ +msa_ld3q_##suffix(const _Tp* ptr, _Tpv* a, _Tpv* b, _Tpv* c) \ +{ \ + _Tpv v0 = msa_ld1q_##suffix(ptr); \ + _Tpv v1 = msa_ld1q_##suffix(ptr + 8); \ + _Tpv v2 = msa_ld1q_##suffix(ptr + 16); \ + _Tpvs v3 = __builtin_msa_vshf_h((_Tpvs)((v2i64){0x0009000600030000, 0x00000000000F000C}), (_Tpvs)v1, (_Tpvs)v0); \ + *a = (_Tpv)__builtin_msa_vshf_h((_Tpvs)((v2i64){0x0003000200010000, 0x000D000A00050004}), (_Tpvs)v2, v3); \ + v3 = __builtin_msa_vshf_h((_Tpvs)((v2i64){0x000A000700040001, 0x000000000000000D}), (_Tpvs)v1, (_Tpvs)v0); \ + *b = (_Tpv)__builtin_msa_vshf_h((_Tpvs)((v2i64){0x0003000200010000, 0x000E000B00080004}), (_Tpvs)v2, v3); \ + v3 = __builtin_msa_vshf_h((_Tpvs)((v2i64){0x000B000800050002, 0x000000000000000E}), (_Tpvs)v1, (_Tpvs)v0); \ + *c = (_Tpv)__builtin_msa_vshf_h((_Tpvs)((v2i64){0x0003000200010000, 0x000F000C00090004}), (_Tpvs)v2, v3); \ +} +#endif + +MSA_INTERLEAVED_IMPL_LOAD3_16(uint16_t, v8u16, v8i16, u16) +MSA_INTERLEAVED_IMPL_LOAD3_16(int16_t, v8i16, v8i16, s16) + +#define MSA_INTERLEAVED_IMPL_LOAD3_32(_Tp, _Tpv, _Tpvs, suffix) \ +__extension__ extern __inline void \ +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) \ +msa_ld3q_##suffix(const _Tp* ptr, _Tpv* a, _Tpv* b, _Tpv* c) \ +{ \ + _Tpv v00 = msa_ld1q_##suffix(ptr); \ + _Tpv v01 = msa_ld1q_##suffix(ptr + 4); \ + _Tpv v02 = msa_ld1q_##suffix(ptr + 8); \ + _Tpvs v10 = __builtin_msa_ilvr_w((_Tpvs)__builtin_msa_ilvl_d((v2i64)v01, (v2i64)v01), (_Tpvs)v00); \ + _Tpvs v11 = __builtin_msa_ilvr_w((_Tpvs)v02, (_Tpvs)__builtin_msa_ilvl_d((v2i64)v00, (v2i64)v00)); \ + _Tpvs v12 = __builtin_msa_ilvr_w((_Tpvs)__builtin_msa_ilvl_d((v2i64)v02, (v2i64)v02), (_Tpvs)v01); \ + *a = (_Tpv)__builtin_msa_ilvr_w((_Tpvs)__builtin_msa_ilvl_d((v2i64)v11, (v2i64)v11), v10); \ + *b = (_Tpv)__builtin_msa_ilvr_w(v12, (_Tpvs)__builtin_msa_ilvl_d((v2i64)v10, (v2i64)v10)); \ + *c = (_Tpv)__builtin_msa_ilvr_w((_Tpvs)__builtin_msa_ilvl_d((v2i64)v12, (v2i64)v12), v11); \ +} + +MSA_INTERLEAVED_IMPL_LOAD3_32(uint32_t, v4u32, v4i32, u32) +MSA_INTERLEAVED_IMPL_LOAD3_32(int32_t, v4i32, v4i32, s32) +MSA_INTERLEAVED_IMPL_LOAD3_32(float, v4f32, v4i32, f32) + +#define MSA_INTERLEAVED_IMPL_LOAD3_64(_Tp, _Tpv, suffix) \ +__extension__ extern __inline void \ +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) \ +msa_ld3q_##suffix(const _Tp* ptr, _Tpv* a, _Tpv* b, _Tpv* c) \ +{ \ + *((_Tp*)a) = *ptr; *((_Tp*)b) = *(ptr + 1); *((_Tp*)c) = *(ptr + 2); \ + *((_Tp*)a + 1) = *(ptr + 3); *((_Tp*)b + 1) = *(ptr + 4); *((_Tp*)c + 1) = *(ptr + 5); \ +} + +MSA_INTERLEAVED_IMPL_LOAD3_64(uint64_t, v2u64, u64) +MSA_INTERLEAVED_IMPL_LOAD3_64(int64_t, v2i64, s64) +MSA_INTERLEAVED_IMPL_LOAD3_64(double, v2f64, f64) + +#ifdef _MIPSEB +#define MSA_INTERLEAVED_IMPL_STORE3_8(_Tp, _Tpv, _Tpvs, suffix) \ +__extension__ extern __inline void \ +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) \ +msa_st3q_##suffix(_Tp* ptr, const _Tpv a, const _Tpv b, const _Tpv c) \ +{ \ + _Tpvs v0 = __builtin_msa_vshf_b((_Tpvs)((v2i64){0x0F0E0D0C0B1F1F1F, 0x1F1E1D1C1B1A1F1F}), (_Tpvs)b, (_Tpvs)a); \ + _Tpvs v1 = __builtin_msa_vshf_b((_Tpvs)((v2i64){0x0D1C140C1B130B1A, 0x1F170F1E160E1D15}), (_Tpvs)c, (_Tpvs)v0); \ + msa_st1q_##suffix(ptr, (_Tpv)v1); \ + v0 = __builtin_msa_vshf_b((_Tpvs)((v2i64){0x0A09080706051F1F, 0x19181716151F1F1F}), (_Tpvs)b, (_Tpvs)a); \ + v1 = __builtin_msa_vshf_b((_Tpvs)((v2i64){0x1D14071C13061B12, 0x170A1F16091E1508}), (_Tpvs)c, (_Tpvs)v0); \ + msa_st1q_##suffix(ptr + 16, (_Tpv)v1); \ + v0 = __builtin_msa_vshf_b((_Tpvs)((v2i64){0x04030201001F1F1F, 0x14131211101F1F1F}), (_Tpvs)b, (_Tpvs)a); \ + v1 = __builtin_msa_vshf_b((_Tpvs)((v2i64){0x15021C14011B1300, 0x051F17041E16031D}), (_Tpvs)c, (_Tpvs)v0); \ + msa_st1q_##suffix(ptr + 32, (_Tpv)v1); \ +} +#else +#define MSA_INTERLEAVED_IMPL_STORE3_8(_Tp, _Tpv, _Tpvs, suffix) \ +__extension__ extern __inline void \ +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) \ +msa_st3q_##suffix(_Tp* ptr, const _Tpv a, const _Tpv b, const _Tpv c) \ +{ \ + _Tpvs v0 = __builtin_msa_vshf_b((_Tpvs)((v2i64){0x0000050403020100, 0x0000001413121110}), (_Tpvs)b, (_Tpvs)a); \ + _Tpvs v1 = __builtin_msa_vshf_b((_Tpvs)((v2i64){0x0A02110901100800, 0x05140C04130B0312}), (_Tpvs)c, (_Tpvs)v0); \ + msa_st1q_##suffix(ptr, (_Tpv)v1); \ + v0 = __builtin_msa_vshf_b((_Tpvs)((v2i64){0x0000000A09080706, 0x00001A1918171615}), (_Tpvs)b, (_Tpvs)a); \ + v1 = __builtin_msa_vshf_b((_Tpvs)((v2i64){0x170A011609001508, 0x0D04190C03180B02}), (_Tpvs)c, (_Tpvs)v0); \ + msa_st1q_##suffix(ptr + 16, (_Tpv)v1); \ + v0 = __builtin_msa_vshf_b((_Tpvs)((v2i64){0x0000000F0E0D0C0B, 0x0000001F1E1D1C1B}), (_Tpvs)b, (_Tpvs)a); \ + v1 = __builtin_msa_vshf_b((_Tpvs)((v2i64){0x021C09011B08001A, 0x1F0C041E0B031D0A}), (_Tpvs)c, (_Tpvs)v0); \ + msa_st1q_##suffix(ptr + 32, (_Tpv)v1); \ +} +#endif + +MSA_INTERLEAVED_IMPL_STORE3_8(uint8_t, v16u8, v16i8, u8) +MSA_INTERLEAVED_IMPL_STORE3_8(int8_t, v16i8, v16i8, s8) + +#ifdef _MIPSEB +#define MSA_INTERLEAVED_IMPL_STORE3_16(_Tp, _Tpv, _Tpvs, suffix) \ +__extension__ extern __inline void \ +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) \ +msa_st3q_##suffix(_Tp* ptr, const _Tpv a, const _Tpv b, const _Tpv c) \ +{ \ + _Tpvs v0 = __builtin_msa_vshf_h((_Tpvs)((v2i64){0x000700060005000F, 0x000F000E000D000F}), (_Tpvs)b, (_Tpvs)a); \ + _Tpvs v1 = __builtin_msa_vshf_h((_Tpvs)((v2i64){0x000A0006000D0009, 0x000F000B0007000E}), (_Tpvs)c, (_Tpvs)v0); \ + msa_st1q_##suffix(ptr, (_Tpv)v1); \ + v0 = __builtin_msa_vshf_h((_Tpvs)((v2i64){0x00040003000F000F, 0x000C000B000A000F}), (_Tpvs)b, (_Tpvs)a); \ + v1 = __builtin_msa_vshf_h((_Tpvs)((v2i64){0x000E000A0003000D, 0x0005000F000B0004}), (_Tpvs)c, (_Tpvs)v0); \ + msa_st1q_##suffix(ptr + 8, (_Tpv)v1); \ + v0 = __builtin_msa_vshf_h((_Tpvs)((v2i64){0x000200010000000F, 0x00090008000F000F}), (_Tpvs)b, (_Tpvs)a); \ + v1 = __builtin_msa_vshf_h((_Tpvs)((v2i64){0x0001000E00090000, 0x000B0002000F000A}), (_Tpvs)c, (_Tpvs)v0); \ + msa_st1q_##suffix(ptr + 16, (_Tpv)v1); \ +} +#else +#define MSA_INTERLEAVED_IMPL_STORE3_16(_Tp, _Tpv, _Tpvs, suffix) \ +__extension__ extern __inline void \ +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) \ +msa_st3q_##suffix(_Tp* ptr, const _Tpv a, const _Tpv b, const _Tpv c) \ +{ \ + _Tpvs v0 = __builtin_msa_vshf_h((_Tpvs)((v2i64){0x0000000200010000, 0x0000000A00090008}), (_Tpvs)b, (_Tpvs)a); \ + _Tpvs v1 = __builtin_msa_vshf_h((_Tpvs)((v2i64){0x0001000800040000, 0x0006000200090005}), (_Tpvs)c, (_Tpvs)v0); \ + msa_st1q_##suffix(ptr, (_Tpv)v1); \ + v0 = __builtin_msa_vshf_h((_Tpvs)((v2i64){0x0000000500040003, 0x00000000000C000B}), (_Tpvs)b, (_Tpvs)a); \ + v1 = __builtin_msa_vshf_h((_Tpvs)((v2i64){0x000B00040000000A, 0x0002000C00050001}), (_Tpvs)c, (_Tpvs)v0); \ + msa_st1q_##suffix(ptr + 8, (_Tpv)v1); \ + v0 = __builtin_msa_vshf_h((_Tpvs)((v2i64){0x0000000000070006, 0x0000000F000E000D}), (_Tpvs)b, (_Tpvs)a); \ + v1 = __builtin_msa_vshf_h((_Tpvs)((v2i64){0x00050000000D0004, 0x000F00060001000E}), (_Tpvs)c, (_Tpvs)v0); \ + msa_st1q_##suffix(ptr + 16, (_Tpv)v1); \ +} +#endif + +MSA_INTERLEAVED_IMPL_STORE3_16(uint16_t, v8u16, v8i16, u16) +MSA_INTERLEAVED_IMPL_STORE3_16(int16_t, v8i16, v8i16, s16) + +#ifdef _MIPSEB +#define MSA_INTERLEAVED_IMPL_STORE3_32(_Tp, _Tpv, _Tpvs, suffix) \ +__extension__ extern __inline void \ +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) \ +msa_st3q_##suffix(_Tp* ptr, const _Tpv a, const _Tpv b, const _Tpv c) \ +{ \ + _Tpvs v0 = __builtin_msa_vshf_w((_Tpvs)((v2i64){0x0000000300000007, 0x0000000700000006}), (_Tpvs)b, (_Tpvs)a); \ + _Tpvs v1 = __builtin_msa_vshf_w((_Tpvs)((v2i64){0x0000000300000006, 0x0000000700000005}), (_Tpvs)c, (_Tpvs)v0); \ + msa_st1q_##suffix(ptr, (_Tpv)v1); \ + v0 = __builtin_msa_vshf_w((_Tpvs)((v2i64){0x0000000200000001, 0x0000000500000007}), (_Tpvs)b, (_Tpvs)a); \ + v1 = __builtin_msa_vshf_w((_Tpvs)((v2i64){0x0000000700000004, 0x0000000500000002}), (_Tpvs)c, (_Tpvs)v0); \ + msa_st1q_##suffix(ptr + 4, (_Tpv)v1); \ + v0 = __builtin_msa_vshf_w((_Tpvs)((v2i64){0x0000000000000007, 0x0000000400000007}), (_Tpvs)b, (_Tpvs)a); \ + v1 = __builtin_msa_vshf_w((_Tpvs)((v2i64){0x0000000500000000, 0x0000000100000007}), (_Tpvs)c, (_Tpvs)v0); \ + msa_st1q_##suffix(ptr + 8, (_Tpv)v1); \ +} +#else +#define MSA_INTERLEAVED_IMPL_STORE3_32(_Tp, _Tpv, _Tpvs, suffix) \ +__extension__ extern __inline void \ +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) \ +msa_st3q_##suffix(_Tp* ptr, const _Tpv a, const _Tpv b, const _Tpv c) \ +{ \ + _Tpvs v0 = __builtin_msa_vshf_w((_Tpvs)((v2i64){0x0000000100000000, 0x0000000000000004}), (_Tpvs)b, (_Tpvs)a); \ + _Tpvs v1 = __builtin_msa_vshf_w((_Tpvs)((v2i64){0x0000000200000000, 0x0000000100000004}), (_Tpvs)c, (_Tpvs)v0); \ + msa_st1q_##suffix(ptr, (_Tpv)v1); \ + v0 = __builtin_msa_vshf_w((_Tpvs)((v2i64){0x0000000000000002, 0x0000000600000005}), (_Tpvs)b, (_Tpvs)a); \ + v1 = __builtin_msa_vshf_w((_Tpvs)((v2i64){0x0000000500000002, 0x0000000300000000}), (_Tpvs)c, (_Tpvs)v0); \ + msa_st1q_##suffix(ptr + 4, (_Tpv)v1); \ + v0 = __builtin_msa_vshf_w((_Tpvs)((v2i64){0x0000000000000003, 0x0000000000000007}), (_Tpvs)b, (_Tpvs)a); \ + v1 = __builtin_msa_vshf_w((_Tpvs)((v2i64){0x0000000000000006, 0x0000000700000002}), (_Tpvs)c, (_Tpvs)v0); \ + msa_st1q_##suffix(ptr + 8, (_Tpv)v1); \ +} +#endif + +MSA_INTERLEAVED_IMPL_STORE3_32(uint32_t, v4u32, v4i32, u32) +MSA_INTERLEAVED_IMPL_STORE3_32(int32_t, v4i32, v4i32, s32) +MSA_INTERLEAVED_IMPL_STORE3_32(float, v4f32, v4i32, f32) + +#define MSA_INTERLEAVED_IMPL_STORE3_64(_Tp, _Tpv, suffix) \ +__extension__ extern __inline void \ +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) \ +msa_st3q_##suffix(_Tp* ptr, const _Tpv a, const _Tpv b, const _Tpv c) \ +{ \ + *ptr = a[0]; *(ptr + 1) = b[0]; *(ptr + 2) = c[0]; \ + *(ptr + 3) = a[1]; *(ptr + 4) = b[1]; *(ptr + 5) = c[1]; \ +} + +MSA_INTERLEAVED_IMPL_STORE3_64(uint64_t, v2u64, u64) +MSA_INTERLEAVED_IMPL_STORE3_64(int64_t, v2i64, s64) +MSA_INTERLEAVED_IMPL_STORE3_64(double, v2f64, f64) + +#define MSA_INTERLEAVED_IMPL_LOAD4_STORE4(_Tp, _Tpv, _Tpvs, suffix, df, nlanes) \ +__extension__ extern __inline void \ +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) \ +msa_ld4q_##suffix(const _Tp* ptr, _Tpv* a, _Tpv* b, _Tpv* c, _Tpv* d) \ +{ \ + _Tpv v0 = msa_ld1q_##suffix(ptr); \ + _Tpv v1 = msa_ld1q_##suffix(ptr + nlanes); \ + _Tpv v2 = msa_ld1q_##suffix(ptr + nlanes * 2); \ + _Tpv v3 = msa_ld1q_##suffix(ptr + nlanes * 3); \ + _Tpvs t0 = __builtin_msa_pckev_##df((_Tpvs)v1, (_Tpvs)v0); \ + _Tpvs t1 = __builtin_msa_pckev_##df((_Tpvs)v3, (_Tpvs)v2); \ + _Tpvs t2 = __builtin_msa_pckod_##df((_Tpvs)v1, (_Tpvs)v0); \ + _Tpvs t3 = __builtin_msa_pckod_##df((_Tpvs)v3, (_Tpvs)v2); \ + *a = (_Tpv)__builtin_msa_pckev_##df(t1, t0); \ + *b = (_Tpv)__builtin_msa_pckev_##df(t3, t2); \ + *c = (_Tpv)__builtin_msa_pckod_##df(t1, t0); \ + *d = (_Tpv)__builtin_msa_pckod_##df(t3, t2); \ +} \ +__extension__ extern __inline void \ +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) \ +msa_st4q_##suffix(_Tp* ptr, const _Tpv a, const _Tpv b, const _Tpv c, const _Tpv d) \ +{ \ + _Tpvs v0 = __builtin_msa_ilvr_##df((_Tpvs)c, (_Tpvs)a); \ + _Tpvs v1 = __builtin_msa_ilvr_##df((_Tpvs)d, (_Tpvs)b); \ + _Tpvs v2 = __builtin_msa_ilvl_##df((_Tpvs)c, (_Tpvs)a); \ + _Tpvs v3 = __builtin_msa_ilvl_##df((_Tpvs)d, (_Tpvs)b); \ + msa_st1q_##suffix(ptr, (_Tpv)__builtin_msa_ilvr_##df(v1, v0)); \ + msa_st1q_##suffix(ptr + nlanes, (_Tpv)__builtin_msa_ilvl_##df(v1, v0)); \ + msa_st1q_##suffix(ptr + 2 * nlanes, (_Tpv)__builtin_msa_ilvr_##df(v3, v2)); \ + msa_st1q_##suffix(ptr + 3 * nlanes, (_Tpv)__builtin_msa_ilvl_##df(v3, v2)); \ +} + +MSA_INTERLEAVED_IMPL_LOAD4_STORE4(uint8_t, v16u8, v16i8, u8, b, 16) +MSA_INTERLEAVED_IMPL_LOAD4_STORE4(int8_t, v16i8, v16i8, s8, b, 16) +MSA_INTERLEAVED_IMPL_LOAD4_STORE4(uint16_t, v8u16, v8i16, u16, h, 8) +MSA_INTERLEAVED_IMPL_LOAD4_STORE4(int16_t, v8i16, v8i16, s16, h, 8) +MSA_INTERLEAVED_IMPL_LOAD4_STORE4(uint32_t, v4u32, v4i32, u32, w, 4) +MSA_INTERLEAVED_IMPL_LOAD4_STORE4(int32_t, v4i32, v4i32, s32, w, 4) +MSA_INTERLEAVED_IMPL_LOAD4_STORE4(float, v4f32, v4i32, f32, w, 4) + +#define MSA_INTERLEAVED_IMPL_LOAD4_STORE4_64(_Tp, _Tpv, _Tpvs, suffix) \ +__extension__ extern __inline void \ +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) \ +msa_ld4q_##suffix(const _Tp* ptr, _Tpv* a, _Tpv* b, _Tpv* c, _Tpv* d) \ +{ \ + _Tpv v0 = msa_ld1q_##suffix(ptr); \ + _Tpv v1 = msa_ld1q_##suffix(ptr + 2); \ + _Tpv v2 = msa_ld1q_##suffix(ptr + 4); \ + _Tpv v3 = msa_ld1q_##suffix(ptr + 6); \ + *a = (_Tpv)__builtin_msa_ilvr_d((_Tpvs)v2, (_Tpvs)v0); \ + *b = (_Tpv)__builtin_msa_ilvl_d((_Tpvs)v2, (_Tpvs)v0); \ + *c = (_Tpv)__builtin_msa_ilvr_d((_Tpvs)v3, (_Tpvs)v1); \ + *d = (_Tpv)__builtin_msa_ilvl_d((_Tpvs)v3, (_Tpvs)v1); \ +} \ +__extension__ extern __inline void \ +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) \ +msa_st4q_##suffix(_Tp* ptr, const _Tpv a, const _Tpv b, const _Tpv c, const _Tpv d) \ +{ \ + msa_st1q_##suffix(ptr, (_Tpv)__builtin_msa_ilvr_d((_Tpvs)b, (_Tpvs)a)); \ + msa_st1q_##suffix(ptr + 2, (_Tpv)__builtin_msa_ilvr_d((_Tpvs)d, (_Tpvs)c)); \ + msa_st1q_##suffix(ptr + 4, (_Tpv)__builtin_msa_ilvl_d((_Tpvs)b, (_Tpvs)a)); \ + msa_st1q_##suffix(ptr + 6, (_Tpv)__builtin_msa_ilvl_d((_Tpvs)d, (_Tpvs)c)); \ +} + +MSA_INTERLEAVED_IMPL_LOAD4_STORE4_64(uint64_t, v2u64, v2i64, u64) +MSA_INTERLEAVED_IMPL_LOAD4_STORE4_64(int64_t, v2i64, v2i64, s64) +MSA_INTERLEAVED_IMPL_LOAD4_STORE4_64(double, v2f64, v2i64, f64) + +__extension__ extern __inline v8i16 +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +msa_qdmulhq_n_s16(v8i16 a, int16_t b) +{ + v8i16 a_lo, a_hi; + ILVRL_H2_SH(a, msa_dupq_n_s16(0), a_lo, a_hi); + return msa_packr_s32(msa_shlq_n_s32(msa_mulq_s32(msa_paddlq_s16(a_lo), msa_dupq_n_s32(b)), 1), + msa_shlq_n_s32(msa_mulq_s32(msa_paddlq_s16(a_hi), msa_dupq_n_s32(b)), 1), 16); +} + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif /*__mips_msa*/ +#endif /* OPENCV_CORE_MSA_MACROS_H */ diff --git a/IPL/include/opencv/opencv2/core/hal/simd_utils.impl.hpp b/IPL/include/opencv/opencv2/core/hal/simd_utils.impl.hpp new file mode 100644 index 0000000..fff8f94 --- /dev/null +++ b/IPL/include/opencv/opencv2/core/hal/simd_utils.impl.hpp @@ -0,0 +1,146 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html + +// This header is not standalone. Don't include directly, use "intrin.hpp" instead. +#ifdef OPENCV_HAL_INTRIN_HPP // defined in intrin.hpp + + +#if CV_SIMD128 || CV_SIMD128_CPP + +template struct Type2Vec128_Traits; +#define CV_INTRIN_DEF_TYPE2VEC128_TRAITS(type_, vec_type_) \ + template<> struct Type2Vec128_Traits \ + { \ + typedef vec_type_ vec_type; \ + } + +CV_INTRIN_DEF_TYPE2VEC128_TRAITS(uchar, v_uint8x16); +CV_INTRIN_DEF_TYPE2VEC128_TRAITS(schar, v_int8x16); +CV_INTRIN_DEF_TYPE2VEC128_TRAITS(ushort, v_uint16x8); +CV_INTRIN_DEF_TYPE2VEC128_TRAITS(short, v_int16x8); +CV_INTRIN_DEF_TYPE2VEC128_TRAITS(unsigned, v_uint32x4); +CV_INTRIN_DEF_TYPE2VEC128_TRAITS(int, v_int32x4); +CV_INTRIN_DEF_TYPE2VEC128_TRAITS(float, v_float32x4); +CV_INTRIN_DEF_TYPE2VEC128_TRAITS(uint64, v_uint64x2); +CV_INTRIN_DEF_TYPE2VEC128_TRAITS(int64, v_int64x2); +#if CV_SIMD128_64F +CV_INTRIN_DEF_TYPE2VEC128_TRAITS(double, v_float64x2); +#endif + +template static inline +typename Type2Vec128_Traits<_T>::vec_type v_setall(const _T& a); + +template<> inline Type2Vec128_Traits< uchar>::vec_type v_setall< uchar>(const uchar& a) { return v_setall_u8(a); } +template<> inline Type2Vec128_Traits< schar>::vec_type v_setall< schar>(const schar& a) { return v_setall_s8(a); } +template<> inline Type2Vec128_Traits::vec_type v_setall(const ushort& a) { return v_setall_u16(a); } +template<> inline Type2Vec128_Traits< short>::vec_type v_setall< short>(const short& a) { return v_setall_s16(a); } +template<> inline Type2Vec128_Traits< uint>::vec_type v_setall< uint>(const uint& a) { return v_setall_u32(a); } +template<> inline Type2Vec128_Traits< int>::vec_type v_setall< int>(const int& a) { return v_setall_s32(a); } +template<> inline Type2Vec128_Traits::vec_type v_setall(const uint64& a) { return v_setall_u64(a); } +template<> inline Type2Vec128_Traits< int64>::vec_type v_setall< int64>(const int64& a) { return v_setall_s64(a); } +template<> inline Type2Vec128_Traits< float>::vec_type v_setall< float>(const float& a) { return v_setall_f32(a); } +#if CV_SIMD128_64F +template<> inline Type2Vec128_Traits::vec_type v_setall(const double& a) { return v_setall_f64(a); } +#endif + +#endif // SIMD128 + + +#if CV_SIMD256 + +template struct Type2Vec256_Traits; +#define CV_INTRIN_DEF_TYPE2VEC256_TRAITS(type_, vec_type_) \ + template<> struct Type2Vec256_Traits \ + { \ + typedef vec_type_ vec_type; \ + } + +CV_INTRIN_DEF_TYPE2VEC256_TRAITS(uchar, v_uint8x32); +CV_INTRIN_DEF_TYPE2VEC256_TRAITS(schar, v_int8x32); +CV_INTRIN_DEF_TYPE2VEC256_TRAITS(ushort, v_uint16x16); +CV_INTRIN_DEF_TYPE2VEC256_TRAITS(short, v_int16x16); +CV_INTRIN_DEF_TYPE2VEC256_TRAITS(unsigned, v_uint32x8); +CV_INTRIN_DEF_TYPE2VEC256_TRAITS(int, v_int32x8); +CV_INTRIN_DEF_TYPE2VEC256_TRAITS(float, v_float32x8); +CV_INTRIN_DEF_TYPE2VEC256_TRAITS(uint64, v_uint64x4); +CV_INTRIN_DEF_TYPE2VEC256_TRAITS(int64, v_int64x4); +#if CV_SIMD256_64F +CV_INTRIN_DEF_TYPE2VEC256_TRAITS(double, v_float64x4); +#endif + +template static inline +typename Type2Vec256_Traits<_T>::vec_type v256_setall(const _T& a); + +template<> inline Type2Vec256_Traits< uchar>::vec_type v256_setall< uchar>(const uchar& a) { return v256_setall_u8(a); } +template<> inline Type2Vec256_Traits< schar>::vec_type v256_setall< schar>(const schar& a) { return v256_setall_s8(a); } +template<> inline Type2Vec256_Traits::vec_type v256_setall(const ushort& a) { return v256_setall_u16(a); } +template<> inline Type2Vec256_Traits< short>::vec_type v256_setall< short>(const short& a) { return v256_setall_s16(a); } +template<> inline Type2Vec256_Traits< uint>::vec_type v256_setall< uint>(const uint& a) { return v256_setall_u32(a); } +template<> inline Type2Vec256_Traits< int>::vec_type v256_setall< int>(const int& a) { return v256_setall_s32(a); } +template<> inline Type2Vec256_Traits::vec_type v256_setall(const uint64& a) { return v256_setall_u64(a); } +template<> inline Type2Vec256_Traits< int64>::vec_type v256_setall< int64>(const int64& a) { return v256_setall_s64(a); } +template<> inline Type2Vec256_Traits< float>::vec_type v256_setall< float>(const float& a) { return v256_setall_f32(a); } +#if CV_SIMD256_64F +template<> inline Type2Vec256_Traits::vec_type v256_setall(const double& a) { return v256_setall_f64(a); } +#endif + +#endif // SIMD256 + + +#if CV_SIMD512 + +template struct Type2Vec512_Traits; +#define CV_INTRIN_DEF_TYPE2VEC512_TRAITS(type_, vec_type_) \ + template<> struct Type2Vec512_Traits \ + { \ + typedef vec_type_ vec_type; \ + } + +CV_INTRIN_DEF_TYPE2VEC512_TRAITS(uchar, v_uint8x64); +CV_INTRIN_DEF_TYPE2VEC512_TRAITS(schar, v_int8x64); +CV_INTRIN_DEF_TYPE2VEC512_TRAITS(ushort, v_uint16x32); +CV_INTRIN_DEF_TYPE2VEC512_TRAITS(short, v_int16x32); +CV_INTRIN_DEF_TYPE2VEC512_TRAITS(unsigned, v_uint32x16); +CV_INTRIN_DEF_TYPE2VEC512_TRAITS(int, v_int32x16); +CV_INTRIN_DEF_TYPE2VEC512_TRAITS(float, v_float32x16); +CV_INTRIN_DEF_TYPE2VEC512_TRAITS(uint64, v_uint64x8); +CV_INTRIN_DEF_TYPE2VEC512_TRAITS(int64, v_int64x8); +#if CV_SIMD512_64F +CV_INTRIN_DEF_TYPE2VEC512_TRAITS(double, v_float64x8); +#endif + +template static inline +typename Type2Vec512_Traits<_T>::vec_type v512_setall(const _T& a); + +template<> inline Type2Vec512_Traits< uchar>::vec_type v512_setall< uchar>(const uchar& a) { return v512_setall_u8(a); } +template<> inline Type2Vec512_Traits< schar>::vec_type v512_setall< schar>(const schar& a) { return v512_setall_s8(a); } +template<> inline Type2Vec512_Traits::vec_type v512_setall(const ushort& a) { return v512_setall_u16(a); } +template<> inline Type2Vec512_Traits< short>::vec_type v512_setall< short>(const short& a) { return v512_setall_s16(a); } +template<> inline Type2Vec512_Traits< uint>::vec_type v512_setall< uint>(const uint& a) { return v512_setall_u32(a); } +template<> inline Type2Vec512_Traits< int>::vec_type v512_setall< int>(const int& a) { return v512_setall_s32(a); } +template<> inline Type2Vec512_Traits::vec_type v512_setall(const uint64& a) { return v512_setall_u64(a); } +template<> inline Type2Vec512_Traits< int64>::vec_type v512_setall< int64>(const int64& a) { return v512_setall_s64(a); } +template<> inline Type2Vec512_Traits< float>::vec_type v512_setall< float>(const float& a) { return v512_setall_f32(a); } +#if CV_SIMD512_64F +template<> inline Type2Vec512_Traits::vec_type v512_setall(const double& a) { return v512_setall_f64(a); } +#endif + +#endif // SIMD512 + + +#if CV_SIMD_WIDTH == 16 +template static inline +typename Type2Vec128_Traits<_T>::vec_type vx_setall(const _T& a) { return v_setall(a); } +#elif CV_SIMD_WIDTH == 32 +template static inline +typename Type2Vec256_Traits<_T>::vec_type vx_setall(const _T& a) { return v256_setall(a); } +#elif CV_SIMD_WIDTH == 64 +template static inline +typename Type2Vec512_Traits<_T>::vec_type vx_setall(const _T& a) { return v512_setall(a); } +#else +#error "Build configuration error, unsupported CV_SIMD_WIDTH" +#endif + + +#endif // OPENCV_HAL_INTRIN_HPP diff --git a/IPL/include/opencv/opencv2/core/ippasync.hpp b/IPL/include/opencv/opencv2/core/ippasync.hpp deleted file mode 100644 index 4de8611..0000000 --- a/IPL/include/opencv/opencv2/core/ippasync.hpp +++ /dev/null @@ -1,195 +0,0 @@ -/*M/////////////////////////////////////////////////////////////////////////////////////// -// -// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. -// -// By downloading, copying, installing or using the software you agree to this license. -// If you do not agree to this license, do not download, install, -// copy or use the software. -// -// -// License Agreement -// For Open Source Computer Vision Library -// -// Copyright (C) 2000-2015, Intel Corporation, all rights reserved. -// Copyright (C) 2009, Willow Garage Inc., all rights reserved. -// Copyright (C) 2013, OpenCV Foundation, all rights reserved. -// Copyright (C) 2015, Itseez Inc., all rights reserved. -// Third party copyrights are property of their respective owners. -// -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: -// -// * Redistribution's of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// * Redistribution's in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// * The name of the copyright holders may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// This software is provided by the copyright holders and contributors "as is" and -// any express or implied warranties, including, but not limited to, the implied -// warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, -// indirect, incidental, special, exemplary, or consequential damages -// (including, but not limited to, procurement of substitute goods or services; -// loss of use, data, or profits; or business interruption) however caused -// and on any theory of liability, whether in contract, strict liability, -// or tort (including negligence or otherwise) arising in any way out of -// the use of this software, even if advised of the possibility of such damage. -// -//M*/ - -#ifndef __OPENCV_CORE_IPPASYNC_HPP__ -#define __OPENCV_CORE_IPPASYNC_HPP__ - -#ifdef HAVE_IPP_A - -#include "opencv2/core.hpp" -#include -#include - -namespace cv -{ - -namespace hpp -{ - -/** @addtogroup core_ipp -This section describes conversion between OpenCV and [Intel® IPP Asynchronous -C/C++](http://software.intel.com/en-us/intel-ipp-preview) library. [Getting Started -Guide](http://registrationcenter.intel.com/irc_nas/3727/ipp_async_get_started.htm) help you to -install the library, configure header and library build paths. - */ -//! @{ - - //! convert OpenCV data type to hppDataType - inline int toHppType(const int cvType) - { - int depth = CV_MAT_DEPTH(cvType); - int hppType = depth == CV_8U ? HPP_DATA_TYPE_8U : - depth == CV_16U ? HPP_DATA_TYPE_16U : - depth == CV_16S ? HPP_DATA_TYPE_16S : - depth == CV_32S ? HPP_DATA_TYPE_32S : - depth == CV_32F ? HPP_DATA_TYPE_32F : - depth == CV_64F ? HPP_DATA_TYPE_64F : -1; - CV_Assert( hppType >= 0 ); - return hppType; - } - - //! convert hppDataType to OpenCV data type - inline int toCvType(const int hppType) - { - int cvType = hppType == HPP_DATA_TYPE_8U ? CV_8U : - hppType == HPP_DATA_TYPE_16U ? CV_16U : - hppType == HPP_DATA_TYPE_16S ? CV_16S : - hppType == HPP_DATA_TYPE_32S ? CV_32S : - hppType == HPP_DATA_TYPE_32F ? CV_32F : - hppType == HPP_DATA_TYPE_64F ? CV_64F : -1; - CV_Assert( cvType >= 0 ); - return cvType; - } - - /** @brief Convert hppiMatrix to Mat. - - This function allocates and initializes new matrix (if needed) that has the same size and type as - input matrix. Supports CV_8U, CV_16U, CV_16S, CV_32S, CV_32F, CV_64F. - @param src input hppiMatrix. - @param dst output matrix. - @param accel accelerator instance (see hpp::getHpp for the list of acceleration framework types). - @param cn number of channels. - */ - inline void copyHppToMat(hppiMatrix* src, Mat& dst, hppAccel accel, int cn) - { - hppDataType type; - hpp32u width, height; - hppStatus sts; - - if (src == NULL) - return dst.release(); - - sts = hppiInquireMatrix(src, &type, &width, &height); - - CV_Assert( sts == HPP_STATUS_NO_ERROR); - - int matType = CV_MAKETYPE(toCvType(type), cn); - - CV_Assert(width%cn == 0); - - width /= cn; - - dst.create((int)height, (int)width, (int)matType); - - size_t newSize = (size_t)(height*(hpp32u)(dst.step)); - - sts = hppiGetMatrixData(accel,src,(hpp32u)(dst.step),dst.data,&newSize); - - CV_Assert( sts == HPP_STATUS_NO_ERROR); - } - - /** @brief Create Mat from hppiMatrix. - - This function allocates and initializes the Mat that has the same size and type as input matrix. - Supports CV_8U, CV_16U, CV_16S, CV_32S, CV_32F, CV_64F. - @param src input hppiMatrix. - @param accel accelerator instance (see hpp::getHpp for the list of acceleration framework types). - @param cn number of channels. - @sa howToUseIPPAconversion, hpp::copyHppToMat, hpp::getHpp. - */ - inline Mat getMat(hppiMatrix* src, hppAccel accel, int cn) - { - Mat dst; - copyHppToMat(src, dst, accel, cn); - return dst; - } - - /** @brief Create hppiMatrix from Mat. - - This function allocates and initializes the hppiMatrix that has the same size and type as input - matrix, returns the hppiMatrix*. - - If you want to use zero-copy for GPU you should to have 4KB aligned matrix data. See details - [hppiCreateSharedMatrix](http://software.intel.com/ru-ru/node/501697). - - Supports CV_8U, CV_16U, CV_16S, CV_32S, CV_32F, CV_64F. - - @note The hppiMatrix pointer to the image buffer in system memory refers to the src.data. Control - the lifetime of the matrix and don't change its data, if there is no special need. - @param src input matrix. - @param accel accelerator instance. Supports type: - - **HPP_ACCEL_TYPE_CPU** - accelerated by optimized CPU instructions. - - **HPP_ACCEL_TYPE_GPU** - accelerated by GPU programmable units or fixed-function - accelerators. - - **HPP_ACCEL_TYPE_ANY** - any acceleration or no acceleration available. - @sa howToUseIPPAconversion, hpp::getMat - */ - inline hppiMatrix* getHpp(const Mat& src, hppAccel accel) - { - int htype = toHppType(src.type()); - int cn = src.channels(); - - CV_Assert(src.data); - hppAccelType accelType = hppQueryAccelType(accel); - - if (accelType!=HPP_ACCEL_TYPE_CPU) - { - hpp32u pitch, size; - hppQueryMatrixAllocParams(accel, src.cols*cn, src.rows, htype, &pitch, &size); - if (pitch!=0 && size!=0) - if ((int)(src.data)%4096==0 && pitch==(hpp32u)(src.step)) - { - return hppiCreateSharedMatrix(htype, src.cols*cn, src.rows, src.data, pitch, size); - } - } - - return hppiCreateMatrix(htype, src.cols*cn, src.rows, src.data, (hpp32s)(src.step));; - } - -//! @} -}} - -#endif - -#endif diff --git a/IPL/include/opencv/opencv2/core/mat.hpp b/IPL/include/opencv/opencv2/core/mat.hpp index d554663..adabeca 100644 --- a/IPL/include/opencv/opencv2/core/mat.hpp +++ b/IPL/include/opencv/opencv2/core/mat.hpp @@ -41,8 +41,8 @@ // //M*/ -#ifndef __OPENCV_CORE_MAT_HPP__ -#define __OPENCV_CORE_MAT_HPP__ +#ifndef OPENCV_CORE_MAT_HPP +#define OPENCV_CORE_MAT_HPP #ifndef __cplusplus # error mat.hpp header must be compiled as C++ @@ -53,14 +53,20 @@ #include "opencv2/core/bufferpool.hpp" +#include + namespace cv { //! @addtogroup core_basic //! @{ -enum { ACCESS_READ=1<<24, ACCESS_WRITE=1<<25, +enum AccessFlag { ACCESS_READ=1<<24, ACCESS_WRITE=1<<25, ACCESS_RW=3<<24, ACCESS_MASK=ACCESS_RW, ACCESS_FAST=1<<26 }; +CV_ENUM_FLAGS(AccessFlag) +__CV_ENUM_FLAGS_BITWISE_AND(AccessFlag, int, AccessFlag) + +CV__DEBUG_NS_BEGIN class CV_EXPORTS _OutputArray; @@ -73,8 +79,8 @@ It is defined as: typedef const _InputArray& InputArray; @endcode where _InputArray is a class that can be constructed from `Mat`, `Mat_`, `Matx`, -`std::vector`, `std::vector >` or `std::vector`. It can also be constructed -from a matrix expression. +`std::vector`, `std::vector >`, `std::vector`, `std::vector >`, +`UMat`, `std::vector` or `double`. It can also be constructed from a matrix expression. Since this is mostly implementation-level class, and its interface may change in future versions, we do not describe it in details. There are a few key things, though, that should be kept in mind: @@ -142,11 +148,17 @@ synonym is needed to generate Python/Java etc. wrappers properly. At the functio level their use is similar, but _InputArray::getMat(idx) should be used to get header for the idx-th component of the outer vector and _InputArray::size().area() should be used to find the number of components (vectors/matrices) of the outer vector. + +In general, type support is limited to cv::Mat types. Other types are forbidden. +But in some cases we need to support passing of custom non-general Mat types, like arrays of cv::KeyPoint, cv::DMatch, etc. +This data is not intended to be interpreted as an image data, or processed somehow like regular cv::Mat. +To pass such custom type use rawIn() / rawOut() / rawInOut() wrappers. +Custom type is wrapped as Mat-compatible `CV_8UC` values (N = sizeof(T), N <= CV_CN_MAX). */ class CV_EXPORTS _InputArray { public: - enum { + enum KindFlag { KIND_SHIFT = 16, FIXED_TYPE = 0x8000 << KIND_SHIFT, FIXED_SIZE = 0x4000 << KIND_SHIFT, @@ -165,7 +177,9 @@ class CV_EXPORTS _InputArray UMAT =10 << KIND_SHIFT, STD_VECTOR_UMAT =11 << KIND_SHIFT, STD_BOOL_VECTOR =12 << KIND_SHIFT, - STD_VECTOR_CUDA_GPU_MAT = 13 << KIND_SHIFT + STD_VECTOR_CUDA_GPU_MAT = 13 << KIND_SHIFT, + STD_ARRAY =14 << KIND_SHIFT, + STD_ARRAY_MAT =15 << KIND_SHIFT }; _InputArray(); @@ -177,6 +191,7 @@ class CV_EXPORTS _InputArray template _InputArray(const std::vector<_Tp>& vec); _InputArray(const std::vector& vec); template _InputArray(const std::vector >& vec); + _InputArray(const std::vector >&) = delete; // not supported template _InputArray(const std::vector >& vec); template _InputArray(const _Tp* vec, int n); template _InputArray(const Matx<_Tp, m, n>& matx); @@ -189,6 +204,12 @@ class CV_EXPORTS _InputArray _InputArray(const UMat& um); _InputArray(const std::vector& umv); + template _InputArray(const std::array<_Tp, _Nm>& arr); + template _InputArray(const std::array& arr); + + template static _InputArray rawIn(const std::vector<_Tp>& vec); + template static _InputArray rawIn(const std::array<_Tp, _Nm>& arr); + Mat getMat(int idx=-1) const; Mat getMat_(int idx=-1) const; UMat getUMat(int idx=-1) const; @@ -202,7 +223,7 @@ class CV_EXPORTS _InputArray void* getObj() const; Size getSz() const; - int kind() const; + _InputArray::KindFlag kind() const; int dims(int i=-1) const; int cols(int i=-1) const; int rows(int i=-1) const; @@ -226,6 +247,7 @@ class CV_EXPORTS _InputArray bool isUMatVector() const; bool isMatx() const; bool isVector() const; + bool isGpuMat() const; bool isGpuMatVector() const; ~_InputArray(); @@ -237,7 +259,8 @@ class CV_EXPORTS _InputArray void init(int _flags, const void* _obj); void init(int _flags, const void* _obj, Size _sz); }; - +CV_ENUM_FLAGS(_InputArray::KindFlag) +__CV_ENUM_FLAGS_BITWISE_AND(_InputArray::KindFlag, int, _InputArray::KindFlag) /** @brief This type is very similar to InputArray except that it is used for input/output and output function parameters. @@ -267,7 +290,7 @@ There are several synonyms for OutputArray that are used to assist automatic Pyt class CV_EXPORTS _OutputArray : public _InputArray { public: - enum + enum DepthMask { DEPTH_MASK_8U = 1 << CV_8U, DEPTH_MASK_8S = 1 << CV_8S, @@ -276,8 +299,10 @@ class CV_EXPORTS _OutputArray : public _InputArray DEPTH_MASK_32S = 1 << CV_32S, DEPTH_MASK_32F = 1 << CV_32F, DEPTH_MASK_64F = 1 << CV_64F, + DEPTH_MASK_16F = 1 << CV_16F, DEPTH_MASK_ALL = (DEPTH_MASK_64F<<1)-1, DEPTH_MASK_ALL_BUT_8S = DEPTH_MASK_ALL & ~DEPTH_MASK_8S, + DEPTH_MASK_ALL_16F = (DEPTH_MASK_16F<<1)-1, DEPTH_MASK_FLT = DEPTH_MASK_32F + DEPTH_MASK_64F }; @@ -291,8 +316,9 @@ class CV_EXPORTS _OutputArray : public _InputArray _OutputArray(cuda::HostMem& cuda_mem); template _OutputArray(cudev::GpuMat_<_Tp>& m); template _OutputArray(std::vector<_Tp>& vec); - _OutputArray(std::vector& vec); + _OutputArray(std::vector& vec) = delete; // not supported template _OutputArray(std::vector >& vec); + _OutputArray(std::vector >&) = delete; // not supported template _OutputArray(std::vector >& vec); template _OutputArray(Mat_<_Tp>& m); template _OutputArray(_Tp* vec, int n); @@ -316,6 +342,14 @@ class CV_EXPORTS _OutputArray : public _InputArray _OutputArray(const UMat& m); _OutputArray(const std::vector& vec); + template _OutputArray(std::array<_Tp, _Nm>& arr); + template _OutputArray(const std::array<_Tp, _Nm>& arr); + template _OutputArray(std::array& arr); + template _OutputArray(const std::array& arr); + + template static _OutputArray rawOut(std::vector<_Tp>& vec); + template static _OutputArray rawOut(std::array<_Tp, _Nm>& arr); + bool fixedSize() const; bool fixedType() const; bool needed() const; @@ -325,9 +359,9 @@ class CV_EXPORTS _OutputArray : public _InputArray std::vector& getGpuMatVecRef() const; ogl::Buffer& getOGlBufferRef() const; cuda::HostMem& getHostMemRef() const; - void create(Size sz, int type, int i=-1, bool allowTransposed=false, int fixedDepthMask=0) const; - void create(int rows, int cols, int type, int i=-1, bool allowTransposed=false, int fixedDepthMask=0) const; - void create(int dims, const int* size, int type, int i=-1, bool allowTransposed=false, int fixedDepthMask=0) const; + void create(Size sz, int type, int i=-1, bool allowTransposed=false, _OutputArray::DepthMask fixedDepthMask=static_cast<_OutputArray::DepthMask>(0)) const; + void create(int rows, int cols, int type, int i=-1, bool allowTransposed=false, _OutputArray::DepthMask fixedDepthMask=static_cast<_OutputArray::DepthMask>(0)) const; + void create(int dims, const int* size, int type, int i=-1, bool allowTransposed=false, _OutputArray::DepthMask fixedDepthMask=static_cast<_OutputArray::DepthMask>(0)) const; void createSameSize(const _InputArray& arr, int mtype) const; void release() const; void clear() const; @@ -335,6 +369,12 @@ class CV_EXPORTS _OutputArray : public _InputArray void assign(const UMat& u) const; void assign(const Mat& m) const; + + void assign(const std::vector& v) const; + void assign(const std::vector& v) const; + + void move(UMat& u) const; + void move(Mat& m) const; }; @@ -350,7 +390,7 @@ class CV_EXPORTS _InputOutputArray : public _OutputArray _InputOutputArray(cuda::HostMem& cuda_mem); template _InputOutputArray(cudev::GpuMat_<_Tp>& m); template _InputOutputArray(std::vector<_Tp>& vec); - _InputOutputArray(std::vector& vec); + _InputOutputArray(std::vector& vec) = delete; // not supported template _InputOutputArray(std::vector >& vec); template _InputOutputArray(std::vector >& vec); template _InputOutputArray(Mat_<_Tp>& m); @@ -374,8 +414,26 @@ class CV_EXPORTS _InputOutputArray : public _OutputArray template _InputOutputArray(const Matx<_Tp, m, n>& matx); _InputOutputArray(const UMat& m); _InputOutputArray(const std::vector& vec); + + template _InputOutputArray(std::array<_Tp, _Nm>& arr); + template _InputOutputArray(const std::array<_Tp, _Nm>& arr); + template _InputOutputArray(std::array& arr); + template _InputOutputArray(const std::array& arr); + + template static _InputOutputArray rawInOut(std::vector<_Tp>& vec); + template _InputOutputArray rawInOut(std::array<_Tp, _Nm>& arr); + }; +/** Helper to wrap custom types. @see InputArray */ +template static inline _InputArray rawIn(_Tp& v); +/** Helper to wrap custom types. @see InputArray */ +template static inline _OutputArray rawOut(_Tp& v); +/** Helper to wrap custom types. @see InputArray */ +template static inline _InputOutputArray rawInOut(_Tp& v); + +CV__DEBUG_NS_END + typedef const _InputArray& InputArray; typedef InputArray InputArrayOfArrays; typedef const _OutputArray& OutputArray; @@ -415,10 +473,10 @@ class CV_EXPORTS MatAllocator // uchar*& datastart, uchar*& data, size_t* step) = 0; //virtual void deallocate(int* refcount, uchar* datastart, uchar* data) = 0; virtual UMatData* allocate(int dims, const int* sizes, int type, - void* data, size_t* step, int flags, UMatUsageFlags usageFlags) const = 0; - virtual bool allocate(UMatData* data, int accessflags, UMatUsageFlags usageFlags) const = 0; + void* data, size_t* step, AccessFlag flags, UMatUsageFlags usageFlags) const = 0; + virtual bool allocate(UMatData* data, AccessFlag accessflags, UMatUsageFlags usageFlags) const = 0; virtual void deallocate(UMatData* data) const = 0; - virtual void map(UMatData* data, int accessflags) const; + virtual void map(UMatData* data, AccessFlag accessflags) const; virtual void unmap(UMatData* data) const; virtual void download(UMatData* data, void* dst, int dims, const size_t sz[], const size_t srcofs[], const size_t srcstep[], @@ -471,9 +529,11 @@ template class MatCommaInitializer_ // it should be explicitly initialized using init(). struct CV_EXPORTS UMatData { - enum { COPY_ON_MAP=1, HOST_COPY_OBSOLETE=2, + enum MemoryFlag { COPY_ON_MAP=1, HOST_COPY_OBSOLETE=2, DEVICE_COPY_OBSOLETE=4, TEMP_UMAT=8, TEMP_COPIED_UMAT=24, - USER_ALLOCATED=32, DEVICE_MEM_MAPPED=64}; + USER_ALLOCATED=32, DEVICE_MEM_MAPPED=64, + ASYNC_CLEANUP=128 + }; UMatData(const MatAllocator* allocator); ~UMatData(); @@ -499,30 +559,24 @@ struct CV_EXPORTS UMatData uchar* origdata; size_t size; - int flags; + UMatData::MemoryFlag flags; void* handle; void* userdata; int allocatorFlags_; int mapcount; UMatData* originalUMatData; }; - - -struct CV_EXPORTS UMatDataAutoLock -{ - explicit UMatDataAutoLock(UMatData* u); - ~UMatDataAutoLock(); - UMatData* u; -}; +CV_ENUM_FLAGS(UMatData::MemoryFlag) struct CV_EXPORTS MatSize { explicit MatSize(int* _p); + int dims() const; Size operator()() const; const int& operator[](int i) const; int& operator[](int i); - operator const int*() const; + operator const int*() const; // TODO OpenCV 4.0: drop this bool operator == (const MatSize& sz) const; bool operator != (const MatSize& sz) const; @@ -544,11 +598,11 @@ struct CV_EXPORTS MatStep MatStep& operator = (const MatStep&); }; -/** @example cout_mat.cpp +/** @example samples/cpp/cout_mat.cpp An example demonstrating the serial out capabilities of cv::Mat */ - /** @brief n-dimensional dense array class + /** @brief n-dimensional dense array class \anchor CVMat_Details The class Mat represents an n-dimensional dense numerical single-channel or multi-channel array. It can be used to store real or complex-valued vectors and matrices, grayscale or color images, voxel @@ -563,12 +617,11 @@ Note that `M.step[i] >= M.step[i+1]` (in fact, `M.step[i] >= M.step[i+1]*M.size[ that 2-dimensional matrices are stored row-by-row, 3-dimensional matrices are stored plane-by-plane, and so on. M.step[M.dims-1] is minimal and always equal to the element size M.elemSize() . -So, the data layout in Mat is fully compatible with CvMat, IplImage, and CvMatND types from OpenCV -1.x. It is also compatible with the majority of dense array types from the standard toolkits and -SDKs, such as Numpy (ndarray), Win32 (independent device bitmaps), and others, that is, with any -array that uses *steps* (or *strides*) to compute the position of a pixel. Due to this -compatibility, it is possible to make a Mat header for user-allocated data and process it in-place -using OpenCV functions. +So, the data layout in Mat is compatible with the majority of dense array types from the standard +toolkits and SDKs, such as Numpy (ndarray), Win32 (independent device bitmaps), and others, +that is, with any array that uses *steps* (or *strides*) to compute the position of a pixel. +Due to this compatibility, it is possible to make a Mat header for user-allocated data and process +it in-place using OpenCV functions. There are many different ways to create a Mat object. The most popular options are listed below: @@ -653,14 +706,10 @@ sub-matrices. Mat M = Mat(3, 3, CV_64F, m).inv(); @endcode . - Partial yet very common cases of this *user-allocated data* case are conversions from CvMat and - IplImage to Mat. For this purpose, there is function cv::cvarrToMat taking pointers to CvMat or - IplImage and the optional flag indicating whether to copy the data or not. - @snippet samples/cpp/image.cpp iplimage - Use MATLAB-style array initializers, zeros(), ones(), eye(), for example: @code - // create a double-precision identity martix and add it to M. + // create a double-precision identity matrix and add it to M. M += Mat::eye(M.rows, M.cols, CV_64F); @endcode @@ -693,7 +742,7 @@ If you need to process a whole row of a 2D array, the most efficient way is to g the row first, and then just use the plain C operator [] : @code // compute sum of positive matrix elements - // (assuming that M isa double-precision matrix) + // (assuming that M is a double-precision matrix) double sum=0; for(int i = 0; i < M.rows; i++) { @@ -736,6 +785,8 @@ Finally, there are STL-style iterators that are smart enough to skip gaps betwee @endcode The matrix iterators are random-access iterators, so they can be passed to any STL algorithm, including std::sort(). + +@note Matrix Expressions and arithmetic see MatExpr */ class CV_EXPORTS Mat { @@ -794,6 +845,13 @@ class CV_EXPORTS Mat */ Mat(int ndims, const int* sizes, int type); + /** @overload + @param sizes Array of integers specifying an n-dimensional array shape. + @param type Array type. Use CV_8UC1, ..., CV_64FC4 to create 1-4 channel matrices, or + CV_8UC(n), ..., CV_64FC(n) to create multi-channel (up to CV_CN_MAX channels) matrices. + */ + Mat(const std::vector& sizes, int type); + /** @overload @param ndims Array dimensionality. @param sizes Array of integers specifying an n-dimensional array shape. @@ -805,6 +863,17 @@ class CV_EXPORTS Mat */ Mat(int ndims, const int* sizes, int type, const Scalar& s); + /** @overload + @param sizes Array of integers specifying an n-dimensional array shape. + @param type Array type. Use CV_8UC1, ..., CV_64FC4 to create 1-4 channel matrices, or + CV_8UC(n), ..., CV_64FC(n) to create multi-channel (up to CV_CN_MAX channels) matrices. + @param s An optional value to initialize each matrix element with. To set all the matrix elements to + the particular value after the construction, use the assignment operator + Mat::operator=(const Scalar& value) . + */ + Mat(const std::vector& sizes, int type, const Scalar& s); + + /** @overload @param m Array that (as a whole or partly) is assigned to the constructed matrix. No data is copied by these constructors. Instead, the header pointing to m data or its sub-array is constructed and @@ -861,6 +930,20 @@ class CV_EXPORTS Mat */ Mat(int ndims, const int* sizes, int type, void* data, const size_t* steps=0); + /** @overload + @param sizes Array of integers specifying an n-dimensional array shape. + @param type Array type. Use CV_8UC1, ..., CV_64FC4 to create 1-4 channel matrices, or + CV_8UC(n), ..., CV_64FC(n) to create multi-channel (up to CV_CN_MAX channels) matrices. + @param data Pointer to the user data. Matrix constructors that take data and step parameters do not + allocate matrix data. Instead, they just initialize the matrix header that points to the specified + data, which means that no data is copied. This operation is very efficient and can be used to + process external data using OpenCV functions. The external data is not automatically deallocated, so + you should take care of it. + @param steps Array of ndims-1 steps in case of a multi-dimensional array (the last step is always + set to the element size). If not specified, the matrix is assumed to be continuous. + */ + Mat(const std::vector& sizes, int type, void* data, const size_t* steps=0); + /** @overload @param m Array that (as a whole or partly) is assigned to the constructed matrix. No data is copied by these constructors. Instead, the header pointing to m data or its sub-array is constructed and @@ -893,6 +976,16 @@ class CV_EXPORTS Mat */ Mat(const Mat& m, const Range* ranges); + /** @overload + @param m Array that (as a whole or partly) is assigned to the constructed matrix. No data is copied + by these constructors. Instead, the header pointing to m data or its sub-array is constructed and + associated with it. The reference counter, if any, is incremented. So, when you modify the matrix + formed using such a constructor, you also modify the corresponding elements of m . If you want to + have an independent copy of the sub-array, use Mat::clone() . + @param ranges Array of selected ranges of m along each dimensionality. + */ + Mat(const Mat& m, const std::vector& ranges); + /** @overload @param vec STL vector whose elements form the matrix. The matrix has a single column and the number of rows equal to the number of vector elements. Type of the matrix matches the type of vector @@ -911,6 +1004,19 @@ class CV_EXPORTS Mat */ template explicit Mat(const std::vector<_Tp>& vec, bool copyData=false); + /** @overload + */ + template::value>::type> + explicit Mat(const std::initializer_list<_Tp> list); + + /** @overload + */ + template explicit Mat(const std::initializer_list sizes, const std::initializer_list<_Tp> list); + + /** @overload + */ + template explicit Mat(const std::array<_Tp, _Nm>& arr, bool copyData=false); + /** @overload */ template explicit Mat(const Vec<_Tp, n>& vec, bool copyData=true); @@ -957,7 +1063,7 @@ class CV_EXPORTS Mat Mat& operator = (const MatExpr& expr); //! retrieve UMat from Mat - UMat getUMat(int accessFlags, UMatUsageFlags usageFlags = USAGE_DEFAULT) const; + UMat getUMat(AccessFlag accessFlags, UMatUsageFlags usageFlags = USAGE_DEFAULT) const; /** @brief Creates a matrix header for the specified matrix row. @@ -1037,18 +1143,40 @@ class CV_EXPORTS Mat single-column matrix. Similarly to Mat::row and Mat::col, this is an O(1) operation. @param d index of the diagonal, with the following values: - `d=0` is the main diagonal. - - `d>0` is a diagonal from the lower half. For example, d=1 means the diagonal is set + - `d<0` is a diagonal from the lower half. For example, d=-1 means the diagonal is set immediately below the main one. - - `d<0` is a diagonal from the upper half. For example, d=-1 means the diagonal is set + - `d>0` is a diagonal from the upper half. For example, d=1 means the diagonal is set immediately above the main one. + For example: + @code + Mat m = (Mat_(3,3) << + 1,2,3, + 4,5,6, + 7,8,9); + Mat d0 = m.diag(0); + Mat d1 = m.diag(1); + Mat d_1 = m.diag(-1); + @endcode + The resulting matrices are + @code + d0 = + [1; + 5; + 9] + d1 = + [2; + 6] + d_1 = + [4; + 8] + @endcode */ Mat diag(int d=0) const; /** @brief creates a diagonal matrix - The method makes a new header for the specified matrix diagonal. The new matrix is represented as a - single-column matrix. Similarly to Mat::row and Mat::col, this is an O(1) operation. - @param d Single-column matrix that forms a diagonal matrix + The method creates a square diagonal matrix from specified main diagonal. + @param d One-dimensional matrix that represents the main diagonal. */ static Mat diag(const Mat& d); @@ -1057,7 +1185,7 @@ class CV_EXPORTS Mat The method creates a full copy of the array. The original step[] is not taken into account. So, the array copy is a continuous array occupying total()*elemSize() bytes. */ - Mat clone() const; + Mat clone() const CV_NODISCARD; /** @brief Copies the matrix to another one. @@ -1079,8 +1207,8 @@ class CV_EXPORTS Mat /** @overload @param m Destination matrix. If it does not have a proper size or type before the operation, it is reallocated. - @param mask Operation mask. Its non-zero elements indicate which matrix elements need to be copied. - The mask has to be of type CV_8U and can have 1 or multiple channels. + @param mask Operation mask of the same size as \*this. Its non-zero elements indicate which matrix + elements need to be copied. The mask has to be of type CV_8U and can have 1 or multiple channels. */ void copyTo( OutputArray m, InputArray mask ) const; @@ -1116,7 +1244,8 @@ class CV_EXPORTS Mat This is an advanced variant of the Mat::operator=(const Scalar& s) operator. @param value Assigned scalar converted to the actual array type. - @param mask Operation mask of the same size as \*this. + @param mask Operation mask of the same size as \*this. Its non-zero elements indicate which matrix + elements need to be copied. The mask has to be of type CV_8U and can have 1 or multiple channels */ Mat& setTo(InputArray value, InputArray mask=noArray()); @@ -1149,6 +1278,9 @@ class CV_EXPORTS Mat /** @overload */ Mat reshape(int cn, int newndims, const int* newsz) const; + /** @overload */ + Mat reshape(int cn, const std::vector& newshape) const; + /** @brief Transposes a matrix. The method performs matrix transposition by means of matrix expressions. It does not perform the @@ -1206,7 +1338,7 @@ class CV_EXPORTS Mat /** @brief Returns a zero array of the specified size and type. The method returns a Matlab-style zero array initializer. It can be used to quickly form a constant - array as a function parameter, part of a matrix expression, or as a matrix initializer. : + array as a function parameter, part of a matrix expression, or as a matrix initializer: @code Mat A; A = Mat::zeros(3, 3, CV_32F); @@ -1242,6 +1374,8 @@ class CV_EXPORTS Mat The above operation does not form a 100x100 matrix of 1's and then multiply it by 3. Instead, it just remembers the scale factor (3 in this case) and use it when actually invoking the matrix initializer. + @note In case of multi-channels type, only the first channel will be initialized with 1's, the + others will be set to 0's. @param rows Number of rows. @param cols Number of columns. @param type Created matrix type. @@ -1269,6 +1403,8 @@ class CV_EXPORTS Mat // make a 4x4 diagonal matrix with 0.1's on the diagonal. Mat A = Mat::eye(4, 4, CV_32F)*0.1; @endcode + @note In case of multi-channels type, identity matrix will be initialized only for the first channel, + the others will be set to 0's @param rows Number of rows. @param cols Number of columns. @param type Created matrix type. @@ -1329,6 +1465,12 @@ class CV_EXPORTS Mat */ void create(int ndims, const int* sizes, int type); + /** @overload + @param sizes Array of integers specifying a new array shape. + @param type New matrix type. + */ + void create(const std::vector& sizes, int type); + /** @brief Increments the reference counter. The method increments the reference counter associated with the matrix data. If the matrix header @@ -1355,7 +1497,7 @@ class CV_EXPORTS Mat */ void release(); - //! deallocates the matrix data + //! internal use function, consider to use 'release' method instead; deallocates the matrix data void deallocate(); //! internal use function; properly re-allocates _size, _step arrays void copySize(const Mat& m); @@ -1369,6 +1511,14 @@ class CV_EXPORTS Mat */ void reserve(size_t sz); + /** @brief Reserves space for the certain number of bytes. + + The method reserves space for sz bytes. If the matrix already has enough space to store sz bytes, + nothing happens. If matrix has to be reallocated its previous content could be lost. + @param sz Number of bytes. + */ + void reserveBuffer(size_t sz); + /** @brief Changes the number of matrix rows. The methods change the number of matrix rows. If the matrix is reallocated, the first @@ -1401,6 +1551,11 @@ class CV_EXPORTS Mat */ template void push_back(const Mat_<_Tp>& elem); + /** @overload + @param elem Added element(s). + */ + template void push_back(const std::vector<_Tp>& elem); + /** @overload @param m Added line(s). */ @@ -1479,17 +1634,17 @@ class CV_EXPORTS Mat */ Mat operator()( const Range* ranges ) const; - // //! converts header to CvMat; no data is copied - // operator CvMat() const; - // //! converts header to CvMatND; no data is copied - // operator CvMatND() const; - // //! converts header to IplImage; no data is copied - // operator IplImage() const; + /** @overload + @param ranges Array of selected ranges along each array dimension. + */ + Mat operator()(const std::vector& ranges) const; template operator std::vector<_Tp>() const; template operator Vec<_Tp, n>() const; template operator Matx<_Tp, m, n>() const; + template operator std::array<_Tp, _Nm>() const; + /** @brief Reports whether the matrix is continuous or not. The method returns true if the matrix elements are stored continuously without gaps at the end of @@ -1522,7 +1677,7 @@ class CV_EXPORTS Mat inv_scale = 1.f/alpha_scale; CV_Assert( src1.type() == src2.type() && - src1.type() == CV_MAKETYPE(DataType::depth, 4) && + src1.type() == CV_MAKETYPE(traits::Depth::value, 4) && src1.size() == src2.size()); Size size = src1.size(); dst.create(size, src1.type()); @@ -1632,7 +1787,33 @@ class CV_EXPORTS Mat */ size_t total() const; - //! returns N if the matrix is 1-channel (N x ptdim) or ptdim-channel (1 x N) or (N x 1); negative number otherwise + /** @brief Returns the total number of array elements. + + The method returns the number of elements within a certain sub-array slice with startDim <= dim < endDim + */ + size_t total(int startDim, int endDim=INT_MAX) const; + + /** + * @param elemChannels Number of channels or number of columns the matrix should have. + * For a 2-D matrix, when the matrix has only 1 column, then it should have + * elemChannels channels; When the matrix has only 1 channel, + * then it should have elemChannels columns. + * For a 3-D matrix, it should have only one channel. Furthermore, + * if the number of planes is not one, then the number of rows + * within every plane has to be 1; if the number of rows within + * every plane is not 1, then the number of planes has to be 1. + * @param depth The depth the matrix should have. Set it to -1 when any depth is fine. + * @param requireContinuous Set it to true to require the matrix to be continuous + * @return -1 if the requirement is not satisfied. + * Otherwise, it returns the number of elements in the matrix. Note + * that an element may have multiple channels. + * + * The following code demonstrates its usage for a 2-d matrix: + * @snippet snippets/core_mat_checkVector.cpp example-2d + * + * The following code demonstrates its usage for a 3-d matrix: + * @snippet snippets/core_mat_checkVector.cpp example-3d + */ int checkVector(int elemChannels, int depth=-1, bool requireContinuous=true) const; /** @brief Returns a pointer to the specified matrix row. @@ -1645,10 +1826,16 @@ class CV_EXPORTS Mat /** @overload */ const uchar* ptr(int i0=0) const; - /** @overload */ - uchar* ptr(int i0, int i1); - /** @overload */ - const uchar* ptr(int i0, int i1) const; + /** @overload + @param row Index along the dimension 0 + @param col Index along the dimension 1 + */ + uchar* ptr(int row, int col); + /** @overload + @param row Index along the dimension 0 + @param col Index along the dimension 1 + */ + const uchar* ptr(int row, int col) const; /** @overload */ uchar* ptr(int i0, int i1, int i2); @@ -1668,10 +1855,16 @@ class CV_EXPORTS Mat template _Tp* ptr(int i0=0); /** @overload */ template const _Tp* ptr(int i0=0) const; - /** @overload */ - template _Tp* ptr(int i0, int i1); - /** @overload */ - template const _Tp* ptr(int i0, int i1) const; + /** @overload + @param row Index along the dimension 0 + @param col Index along the dimension 1 + */ + template _Tp* ptr(int row, int col); + /** @overload + @param row Index along the dimension 0 + @param col Index along the dimension 1 + */ + template const _Tp* ptr(int row, int col) const; /** @overload */ template _Tp* ptr(int i0, int i1, int i2); /** @overload */ @@ -1721,15 +1914,15 @@ class CV_EXPORTS Mat */ template const _Tp& at(int i0=0) const; /** @overload - @param i0 Index along the dimension 0 - @param i1 Index along the dimension 1 + @param row Index along the dimension 0 + @param col Index along the dimension 1 */ - template _Tp& at(int i0, int i1); + template _Tp& at(int row, int col); /** @overload - @param i0 Index along the dimension 0 - @param i1 Index along the dimension 1 + @param row Index along the dimension 0 + @param col Index along the dimension 1 */ - template const _Tp& at(int i0, int i1) const; + template const _Tp& at(int row, int col) const; /** @overload @param i0 Index along the dimension 0 @@ -1784,7 +1977,7 @@ class CV_EXPORTS Mat inv_scale = 1.f/alpha_scale; CV_Assert( src1.type() == src2.type() && - src1.type() == DataType::type && + src1.type() == traits::Type::value && src1.size() == src2.size()); Size size = src1.size(); dst.create(size, src1.type()); @@ -1816,19 +2009,18 @@ class CV_EXPORTS Mat template MatIterator_<_Tp> end(); template MatConstIterator_<_Tp> end() const; - /** @brief Invoke with arguments functor, and runs the functor over all matrix element. + /** @brief Runs the given functor over all matrix elements in parallel. - The methods runs operation in parallel. Operation is passed by arguments. Operation have to be a - function pointer, a function object or a lambda(C++11). + The operation passed as argument has to be a function pointer, a function object or a lambda(C++11). - All of below operation is equal. Put 0xFF to first channel of all matrix elements: + Example 1. All of the operations below put 0xFF the first channel of all matrix elements: @code Mat image(1920, 1080, CV_8UC3); typedef cv::Point3_ Pixel; // first. raw pointer access. for (int r = 0; r < image.rows; ++r) { - Pixel* ptr = image.ptr(0, r); + Pixel* ptr = image.ptr(r, 0); const Pixel* ptr_end = ptr + image.cols; for (; ptr != ptr_end; ++ptr) { ptr->x = 255; @@ -1853,18 +2045,18 @@ class CV_EXPORTS Mat p.x = 255; }); @endcode - position parameter is index of current pixel: + Example 2. Using the pixel's position: @code - // Creating 3D matrix (255 x 255 x 255) typed uint8_t, - // and initialize all elements by the value which equals elements position. - // i.e. pixels (x,y,z) = (1,2,3) is (b,g,r) = (1,2,3). + // Creating 3D matrix (255 x 255 x 255) typed uint8_t + // and initialize all elements by the value which equals elements position. + // i.e. pixels (x,y,z) = (1,2,3) is (b,g,r) = (1,2,3). int sizes[] = { 255, 255, 255 }; typedef cv::Point3_ Pixel; Mat_ image = Mat::zeros(3, sizes, CV_8UC3); - image.forEachWithPosition([&](Pixel& pixel, const int position[]) -> void{ + image.forEach([&](Pixel& pixel, const int position[]) -> void { pixel.x = position[0]; pixel.y = position[1]; pixel.z = position[2]; @@ -1875,10 +2067,8 @@ class CV_EXPORTS Mat /** @overload */ template void forEach(const Functor& operation) const; -#ifdef CV_CXX_MOVE_SEMANTICS Mat(Mat&& m); Mat& operator = (Mat&& m); -#endif enum { MAGIC_VAL = 0x42FF0000, AUTO_STEP = 0, CONTINUOUS_FLAG = CV_MAT_CONT_FLAG, SUBMATRIX_FLAG = CV_SUBMAT_FLAG }; enum { MAGIC_MASK = 0xFFFF0000, TYPE_MASK = 0x00000FFF, DEPTH_MASK = 7 }; @@ -1909,6 +2099,9 @@ class CV_EXPORTS Mat static MatAllocator* getDefaultAllocator(); static void setDefaultAllocator(MatAllocator* allocator); + //! internal use method: updates the continuity flag + void updateContinuityFlag(); + //! interaction with UMat UMatData* u; @@ -1924,7 +2117,7 @@ class CV_EXPORTS Mat /** @brief Template matrix class derived from Mat -@code +@code{.cpp} template class Mat_ : public Mat { public: @@ -1936,7 +2129,7 @@ class CV_EXPORTS Mat The class `Mat_<_Tp>` is a *thin* template wrapper on top of the Mat class. It does not have any extra data fields. Nor this class nor Mat has any virtual methods. Thus, references or pointers to these two classes can be freely but carefully converted one to another. For example: -@code +@code{.cpp} // create a 100x100 8-bit matrix Mat M(100,100,CV_8U); // this will be compiled fine. no any data conversion will be done. @@ -1948,7 +2141,7 @@ While Mat is sufficient in most cases, Mat_ can be more convenient if you use a access operations and if you know matrix type at the compilation time. Note that `Mat::at(int y,int x)` and `Mat_::operator()(int y,int x)` do absolutely the same and run at the same speed, but the latter is certainly shorter: -@code +@code{.cpp} Mat_ M(20,20); for(int i = 0; i < M.rows; i++) for(int j = 0; j < M.cols; j++) @@ -1958,7 +2151,7 @@ and run at the same speed, but the latter is certainly shorter: cout << E.at(0,0)/E.at(M.rows-1,0); @endcode To use Mat_ for multi-channel images/matrices, pass Vec as a Mat_ parameter: -@code +@code{.cpp} // allocate a 320x240 color image and fill it with green (in RGB space) Mat_ img(240, 320, Vec3b(0,255,0)); // now draw a diagonal white line @@ -1968,6 +2161,17 @@ To use Mat_ for multi-channel images/matrices, pass Vec as a Mat_ parameter: for(int i = 0; i < img.rows; i++) for(int j = 0; j < img.cols; j++) img(i,j)[2] ^= (uchar)(i ^ j); +@endcode +Mat_ is fully compatible with C++11 range-based for loop. For example such loop +can be used to safely apply look-up table: +@code{.cpp} +void applyTable(Mat_& I, const uchar* const table) +{ + for(auto& pixel : I) + { + pixel = table[pixel]; + } +} @endcode */ template class Mat_ : public Mat @@ -1992,7 +2196,7 @@ template class Mat_ : public Mat Mat_(int _ndims, const int* _sizes); //! n-dim array constructor that sets each matrix element to specified value Mat_(int _ndims, const int* _sizes, const _Tp& value); - //! copy/conversion contructor. If m is of different type, it's converted + //! copy/conversion constructor. If m is of different type, it's converted Mat_(const Mat& m); //! copy constructor Mat_(const Mat_& m); @@ -2006,6 +2210,8 @@ template class Mat_ : public Mat Mat_(const Mat_& m, const Rect& roi); //! selects a submatrix, n-dim version Mat_(const Mat_& m, const Range* ranges); + //! selects a submatrix, n-dim version + Mat_(const Mat_& m, const std::vector& ranges); //! from a matrix expression explicit Mat_(const MatExpr& e); //! makes a matrix out of Vec, std::vector, Point_ or Point3_. The matrix will have a single column @@ -2016,6 +2222,11 @@ template class Mat_ : public Mat explicit Mat_(const Point3_::channel_type>& pt, bool copyData=true); explicit Mat_(const MatCommaInitializer_<_Tp>& commaInitializer); + Mat_(std::initializer_list<_Tp> values); + explicit Mat_(const std::initializer_list sizes, const std::initializer_list<_Tp> values); + + template explicit Mat_(const std::array<_Tp, _Nm>& arr, bool copyData=false); + Mat_& operator = (const Mat& m); Mat_& operator = (const Mat_& m); //! set all the elements to s. @@ -2040,6 +2251,8 @@ template class Mat_ : public Mat void create(Size _size); //! equivalent to Mat::create(_ndims, _sizes, DatType<_Tp>::type) void create(int _ndims, const int* _sizes); + //! equivalent to Mat::release() + void release(); //! cross-product Mat_ cross(const Mat_& m) const; //! data type conversion @@ -2048,7 +2261,7 @@ template class Mat_ : public Mat Mat_ row(int y) const; Mat_ col(int x) const; Mat_ diag(int d=0) const; - Mat_ clone() const; + Mat_ clone() const CV_NODISCARD; //! overridden forms of Mat::elemSize() etc. size_t elemSize() const; @@ -2070,11 +2283,12 @@ template class Mat_ : public Mat static MatExpr eye(int rows, int cols); static MatExpr eye(Size size); - //! some more overriden methods + //! some more overridden methods Mat_& adjustROI( int dtop, int dbottom, int dleft, int dright ); Mat_ operator()( const Range& rowRange, const Range& colRange ) const; Mat_ operator()( const Rect& roi ) const; Mat_ operator()( const Range* ranges ) const; + Mat_ operator()(const std::vector& ranges) const; //! more convenient forms of row and element access operators _Tp* operator [](int y); @@ -2095,9 +2309,9 @@ template class Mat_ : public Mat //! returns read-only reference to the specified element (1D case) const _Tp& operator ()(int idx0) const; //! returns reference to the specified element (2D case) - _Tp& operator ()(int idx0, int idx1); + _Tp& operator ()(int row, int col); //! returns read-only reference to the specified element (2D case) - const _Tp& operator ()(int idx0, int idx1) const; + const _Tp& operator ()(int row, int col) const; //! returns reference to the specified element (3D case) _Tp& operator ()(int idx0, int idx1, int idx2); //! returns read-only reference to the specified element (3D case) @@ -2108,12 +2322,15 @@ template class Mat_ : public Mat //! conversion to vector. operator std::vector<_Tp>() const; + + //! conversion to array. + template operator std::array<_Tp, _Nm>() const; + //! conversion to Vec template operator Vec::channel_type, n>() const; //! conversion to Matx template operator Matx::channel_type, m, n>() const; -#ifdef CV_CXX_MOVE_SEMANTICS Mat_(Mat_&& m); Mat_& operator = (Mat_&& m); @@ -2121,7 +2338,6 @@ template class Mat_ : public Mat Mat_& operator = (Mat&& m); Mat_(MatExpr&& e); -#endif }; typedef Mat_ Mat1b; @@ -2164,7 +2380,7 @@ class CV_EXPORTS UMat // (_type is CV_8UC1, CV_64FC3, CV_32SC(12) etc.) UMat(int rows, int cols, int type, UMatUsageFlags usageFlags = USAGE_DEFAULT); UMat(Size size, int type, UMatUsageFlags usageFlags = USAGE_DEFAULT); - //! constucts 2D matrix and fills it with the specified value _s. + //! constructs 2D matrix and fills it with the specified value _s. UMat(int rows, int cols, int type, const Scalar& s, UMatUsageFlags usageFlags = USAGE_DEFAULT); UMat(Size size, int type, const Scalar& s, UMatUsageFlags usageFlags = USAGE_DEFAULT); @@ -2179,8 +2395,10 @@ class CV_EXPORTS UMat UMat(const UMat& m, const Range& rowRange, const Range& colRange=Range::all()); UMat(const UMat& m, const Rect& roi); UMat(const UMat& m, const Range* ranges); + UMat(const UMat& m, const std::vector& ranges); //! builds matrix from std::vector with or without copying the data template explicit UMat(const std::vector<_Tp>& vec, bool copyData=false); + //! builds matrix from cv::Vec; the data is copied by default template explicit UMat(const Vec<_Tp, n>& vec, bool copyData=true); //! builds matrix from cv::Matx; the data is copied by default @@ -2197,7 +2415,7 @@ class CV_EXPORTS UMat //! assignment operators UMat& operator = (const UMat& m); - Mat getMat(int flags) const; + Mat getMat(AccessFlag flags) const; //! returns a new matrix header for the specified row UMat row(int y) const; @@ -2210,21 +2428,21 @@ class CV_EXPORTS UMat UMat colRange(int startcol, int endcol) const; UMat colRange(const Range& r) const; //! ... for the specified diagonal - // (d=0 - the main diagonal, - // >0 - a diagonal from the lower half, - // <0 - a diagonal from the upper half) + //! (d=0 - the main diagonal, + //! >0 - a diagonal from the upper half, + //! <0 - a diagonal from the lower half) UMat diag(int d=0) const; //! constructs a square diagonal matrix which main diagonal is vector "d" static UMat diag(const UMat& d); //! returns deep copy of the matrix, i.e. the data is copied - UMat clone() const; + UMat clone() const CV_NODISCARD; //! copies the matrix content to "m". // It calls m.create(this->size(), this->type()). void copyTo( OutputArray m ) const; //! copies those matrix elements to "m" that are marked with non-zero mask elements. void copyTo( OutputArray m, InputArray mask ) const; - //! converts matrix to another datatype with optional scalng. See cvConvertScale. + //! converts matrix to another datatype with optional scaling. See cvConvertScale. void convertTo( OutputArray m, int rtype, double alpha=1, double beta=0 ) const; void assignTo( UMat& m, int type=-1 ) const; @@ -2263,6 +2481,7 @@ class CV_EXPORTS UMat void create(int rows, int cols, int type, UMatUsageFlags usageFlags = USAGE_DEFAULT); void create(Size size, int type, UMatUsageFlags usageFlags = USAGE_DEFAULT); void create(int ndims, const int* sizes, int type, UMatUsageFlags usageFlags = USAGE_DEFAULT); + void create(const std::vector& sizes, int type, UMatUsageFlags usageFlags = USAGE_DEFAULT); //! increases the reference counter; use with care to avoid memleaks void addref(); @@ -2284,6 +2503,7 @@ class CV_EXPORTS UMat UMat operator()( Range rowRange, Range colRange ) const; UMat operator()( const Rect& roi ) const; UMat operator()( const Range* ranges ) const; + UMat operator()(const std::vector& ranges) const; //! returns true iff the matrix data is continuous // (i.e. when there are no gaps between successive rows). @@ -2314,12 +2534,14 @@ class CV_EXPORTS UMat //! returns N if the matrix is 1-channel (N x ptdim) or ptdim-channel (1 x N) or (N x 1); negative number otherwise int checkVector(int elemChannels, int depth=-1, bool requireContinuous=true) const; -#ifdef CV_CXX_MOVE_SEMANTICS UMat(UMat&& m); UMat& operator = (UMat&& m); -#endif - void* handle(int accessFlags) const; + /*! Returns the OpenCL buffer handle on which UMat operates on. + The UMat instance should be kept alive during the use of the handle to prevent the buffer to be + returned to the OpenCV buffer pool. + */ + void* handle(AccessFlag accessFlags) const; void ndoffset(size_t* ofs) const; enum { MAGIC_VAL = 0x42FF0000, AUTO_STEP = 0, CONTINUOUS_FLAG = CV_MAT_CONT_FLAG, SUBMATRIX_FLAG = CV_SUBMAT_FLAG }; @@ -2343,6 +2565,9 @@ class CV_EXPORTS UMat //! and the standard allocator static MatAllocator* getStdAllocator(); + //! internal use method: updates the continuity flag + void updateContinuityFlag(); + // black-box container of UMat data UMatData* u; @@ -2370,15 +2595,16 @@ Elements can be accessed using the following methods: SparseMat::find), for example: @code const int dims = 5; - int size[] = {10, 10, 10, 10, 10}; + int size[5] = {10, 10, 10, 10, 10}; SparseMat sparse_mat(dims, size, CV_32F); for(int i = 0; i < 1000; i++) { int idx[dims]; for(int k = 0; k < dims; k++) - idx[k] = rand() + idx[k] = rand() % size[k]; sparse_mat.ref(idx) += 1.f; } + cout << "nnz = " << sparse_mat.nzcount() << endl; @endcode - Sparse matrix iterators. They are similar to MatIterator but different from NAryMatIterator. That is, the iteration loop is familiar to STL users: @@ -2503,7 +2729,7 @@ class CV_EXPORTS SparseMat SparseMat& operator = (const Mat& m); //! creates full copy of the matrix - SparseMat clone() const; + SparseMat clone() const CV_NODISCARD; //! copies all the data to the destination matrix. All the previous content of m is erased void copyTo( SparseMat& m ) const; @@ -2515,11 +2741,11 @@ class CV_EXPORTS SparseMat /*! @param [out] m - output matrix; if it does not have a proper size or type before the operation, it is reallocated - @param [in] rtype – desired output matrix type or, rather, the depth since the number of channels + @param [in] rtype - desired output matrix type or, rather, the depth since the number of channels are the same as the input has; if rtype is negative, the output matrix will have the same type as the input. - @param [in] alpha – optional scale factor - @param [in] beta – optional delta added to the scaled values + @param [in] alpha - optional scale factor + @param [in] beta - optional delta added to the scaled values */ void convertTo( Mat& m, int rtype, double alpha=1, double beta=0 ) const; @@ -2599,7 +2825,7 @@ class CV_EXPORTS SparseMat `ref<_Tp>(i0,...[,hashval])` is equivalent to `*(_Tp*)ptr(i0,...,true[,hashval])`. The methods always return a valid reference. - If the element did not exist, it is created and initialiazed with 0. + If the element did not exist, it is created and initialized with 0. */ //! returns reference to the specified element (1D case) template _Tp& ref(int i0, size_t* hashval=0); @@ -2722,7 +2948,7 @@ template class SparseMat_ : public SparseMat //! the default constructor SparseMat_(); - //! the full constructor equivelent to SparseMat(dims, _sizes, DataType<_Tp>::type) + //! the full constructor equivalent to SparseMat(dims, _sizes, DataType<_Tp>::type) SparseMat_(int dims, const int* _sizes); //! the copy constructor. If DataType<_Tp>.type != m.type(), the m elements are converted SparseMat_(const SparseMat& m); @@ -2740,7 +2966,7 @@ template class SparseMat_ : public SparseMat SparseMat_& operator = (const Mat& m); //! makes full copy of the matrix. All the elements are duplicated - SparseMat_ clone() const; + SparseMat_ clone() const CV_NODISCARD; //! equivalent to cv::SparseMat::create(dims, _sizes, DataType<_Tp>::type) void create(int dims, const int* _sizes); //! converts sparse matrix to the old-style CvSparseMat. All the elements are copied @@ -2793,9 +3019,7 @@ class CV_EXPORTS MatConstIterator typedef const uchar** pointer; typedef uchar* reference; -#ifndef OPENCV_NOSTL typedef std::random_access_iterator_tag iterator_category; -#endif //! default constructor MatConstIterator(); @@ -2860,9 +3084,7 @@ class MatConstIterator_ : public MatConstIterator typedef const _Tp* pointer; typedef const _Tp& reference; -#ifndef OPENCV_NOSTL typedef std::random_access_iterator_tag iterator_category; -#endif //! default constructor MatConstIterator_(); @@ -2880,9 +3102,9 @@ class MatConstIterator_ : public MatConstIterator //! copy operator MatConstIterator_& operator = (const MatConstIterator_& it); //! returns the current matrix element - _Tp operator *() const; + const _Tp& operator *() const; //! returns the i-th matrix element, relative to the current - _Tp operator [](ptrdiff_t i) const; + const _Tp& operator [](ptrdiff_t i) const; //! shifts the iterator forward by the specified number of elements MatConstIterator_& operator += (ptrdiff_t ofs); @@ -2913,9 +3135,7 @@ class MatIterator_ : public MatConstIterator_<_Tp> typedef _Tp* pointer; typedef _Tp& reference; -#ifndef OPENCV_NOSTL typedef std::random_access_iterator_tag iterator_category; -#endif //! the default constructor MatIterator_(); @@ -3049,9 +3269,7 @@ template class SparseMatConstIterator_ : public SparseMatConstIter { public: -#ifndef OPENCV_NOSTL typedef std::forward_iterator_tag iterator_category; -#endif //! the default constructor SparseMatConstIterator_(); @@ -3085,9 +3303,7 @@ template class SparseMatIterator_ : public SparseMatConstIterator_ { public: -#ifndef OPENCV_NOSTL typedef std::forward_iterator_tag iterator_category; -#endif //! the default constructor SparseMatIterator_(); @@ -3146,21 +3362,29 @@ The example below illustrates how you can compute a normalized and threshold 3D } minProb *= image.rows*image.cols; - Mat plane; - NAryMatIterator it(&hist, &plane, 1); + + // initialize iterator (the style is different from STL). + // after initialization the iterator will contain + // the number of slices or planes the iterator will go through. + // it simultaneously increments iterators for several matrices + // supplied as a null terminated list of pointers + const Mat* arrays[] = {&hist, 0}; + Mat planes[1]; + NAryMatIterator itNAry(arrays, planes, 1); double s = 0; // iterate through the matrix. on each iteration - // it.planes[*] (of type Mat) will be set to the current plane. - for(int p = 0; p < it.nplanes; p++, ++it) + // itNAry.planes[i] (of type Mat) will be set to the current plane + // of the i-th n-dim matrix passed to the iterator constructor. + for(int p = 0; p < itNAry.nplanes; p++, ++itNAry) { - threshold(it.planes[0], it.planes[0], minProb, 0, THRESH_TOZERO); - s += sum(it.planes[0])[0]; + threshold(itNAry.planes[0], itNAry.planes[0], minProb, 0, THRESH_TOZERO); + s += sum(itNAry.planes[0])[0]; } s = 1./s; - it = NAryMatIterator(&hist, &plane, 1); - for(int p = 0; p < it.nplanes; p++, ++it) - it.planes[0] *= s; + itNAry = NAryMatIterator(arrays, planes, 1); + for(int p = 0; p < itNAry.nplanes; p++, ++itNAry) + itNAry.planes[0] *= s; } @endcode */ @@ -3338,6 +3562,10 @@ CV_EXPORTS MatExpr operator + (const Mat& m, const MatExpr& e); CV_EXPORTS MatExpr operator + (const MatExpr& e, const Scalar& s); CV_EXPORTS MatExpr operator + (const Scalar& s, const MatExpr& e); CV_EXPORTS MatExpr operator + (const MatExpr& e1, const MatExpr& e2); +template static inline +MatExpr operator + (const Mat& a, const Matx<_Tp, m, n>& b) { return a + Mat(b); } +template static inline +MatExpr operator + (const Matx<_Tp, m, n>& a, const Mat& b) { return Mat(a) + b; } CV_EXPORTS MatExpr operator - (const Mat& a, const Mat& b); CV_EXPORTS MatExpr operator - (const Mat& a, const Scalar& s); @@ -3347,6 +3575,10 @@ CV_EXPORTS MatExpr operator - (const Mat& m, const MatExpr& e); CV_EXPORTS MatExpr operator - (const MatExpr& e, const Scalar& s); CV_EXPORTS MatExpr operator - (const Scalar& s, const MatExpr& e); CV_EXPORTS MatExpr operator - (const MatExpr& e1, const MatExpr& e2); +template static inline +MatExpr operator - (const Mat& a, const Matx<_Tp, m, n>& b) { return a - Mat(b); } +template static inline +MatExpr operator - (const Matx<_Tp, m, n>& a, const Mat& b) { return Mat(a) - b; } CV_EXPORTS MatExpr operator - (const Mat& m); CV_EXPORTS MatExpr operator - (const MatExpr& e); @@ -3359,6 +3591,10 @@ CV_EXPORTS MatExpr operator * (const Mat& m, const MatExpr& e); CV_EXPORTS MatExpr operator * (const MatExpr& e, double s); CV_EXPORTS MatExpr operator * (double s, const MatExpr& e); CV_EXPORTS MatExpr operator * (const MatExpr& e1, const MatExpr& e2); +template static inline +MatExpr operator * (const Mat& a, const Matx<_Tp, m, n>& b) { return a * Mat(b); } +template static inline +MatExpr operator * (const Matx<_Tp, m, n>& a, const Mat& b) { return Mat(a) * b; } CV_EXPORTS MatExpr operator / (const Mat& a, const Mat& b); CV_EXPORTS MatExpr operator / (const Mat& a, double s); @@ -3368,52 +3604,100 @@ CV_EXPORTS MatExpr operator / (const Mat& m, const MatExpr& e); CV_EXPORTS MatExpr operator / (const MatExpr& e, double s); CV_EXPORTS MatExpr operator / (double s, const MatExpr& e); CV_EXPORTS MatExpr operator / (const MatExpr& e1, const MatExpr& e2); +template static inline +MatExpr operator / (const Mat& a, const Matx<_Tp, m, n>& b) { return a / Mat(b); } +template static inline +MatExpr operator / (const Matx<_Tp, m, n>& a, const Mat& b) { return Mat(a) / b; } CV_EXPORTS MatExpr operator < (const Mat& a, const Mat& b); CV_EXPORTS MatExpr operator < (const Mat& a, double s); CV_EXPORTS MatExpr operator < (double s, const Mat& a); +template static inline +MatExpr operator < (const Mat& a, const Matx<_Tp, m, n>& b) { return a < Mat(b); } +template static inline +MatExpr operator < (const Matx<_Tp, m, n>& a, const Mat& b) { return Mat(a) < b; } CV_EXPORTS MatExpr operator <= (const Mat& a, const Mat& b); CV_EXPORTS MatExpr operator <= (const Mat& a, double s); CV_EXPORTS MatExpr operator <= (double s, const Mat& a); +template static inline +MatExpr operator <= (const Mat& a, const Matx<_Tp, m, n>& b) { return a <= Mat(b); } +template static inline +MatExpr operator <= (const Matx<_Tp, m, n>& a, const Mat& b) { return Mat(a) <= b; } CV_EXPORTS MatExpr operator == (const Mat& a, const Mat& b); CV_EXPORTS MatExpr operator == (const Mat& a, double s); CV_EXPORTS MatExpr operator == (double s, const Mat& a); +template static inline +MatExpr operator == (const Mat& a, const Matx<_Tp, m, n>& b) { return a == Mat(b); } +template static inline +MatExpr operator == (const Matx<_Tp, m, n>& a, const Mat& b) { return Mat(a) == b; } CV_EXPORTS MatExpr operator != (const Mat& a, const Mat& b); CV_EXPORTS MatExpr operator != (const Mat& a, double s); CV_EXPORTS MatExpr operator != (double s, const Mat& a); +template static inline +MatExpr operator != (const Mat& a, const Matx<_Tp, m, n>& b) { return a != Mat(b); } +template static inline +MatExpr operator != (const Matx<_Tp, m, n>& a, const Mat& b) { return Mat(a) != b; } CV_EXPORTS MatExpr operator >= (const Mat& a, const Mat& b); CV_EXPORTS MatExpr operator >= (const Mat& a, double s); CV_EXPORTS MatExpr operator >= (double s, const Mat& a); +template static inline +MatExpr operator >= (const Mat& a, const Matx<_Tp, m, n>& b) { return a >= Mat(b); } +template static inline +MatExpr operator >= (const Matx<_Tp, m, n>& a, const Mat& b) { return Mat(a) >= b; } CV_EXPORTS MatExpr operator > (const Mat& a, const Mat& b); CV_EXPORTS MatExpr operator > (const Mat& a, double s); CV_EXPORTS MatExpr operator > (double s, const Mat& a); +template static inline +MatExpr operator > (const Mat& a, const Matx<_Tp, m, n>& b) { return a > Mat(b); } +template static inline +MatExpr operator > (const Matx<_Tp, m, n>& a, const Mat& b) { return Mat(a) > b; } CV_EXPORTS MatExpr operator & (const Mat& a, const Mat& b); CV_EXPORTS MatExpr operator & (const Mat& a, const Scalar& s); CV_EXPORTS MatExpr operator & (const Scalar& s, const Mat& a); +template static inline +MatExpr operator & (const Mat& a, const Matx<_Tp, m, n>& b) { return a & Mat(b); } +template static inline +MatExpr operator & (const Matx<_Tp, m, n>& a, const Mat& b) { return Mat(a) & b; } CV_EXPORTS MatExpr operator | (const Mat& a, const Mat& b); CV_EXPORTS MatExpr operator | (const Mat& a, const Scalar& s); CV_EXPORTS MatExpr operator | (const Scalar& s, const Mat& a); +template static inline +MatExpr operator | (const Mat& a, const Matx<_Tp, m, n>& b) { return a | Mat(b); } +template static inline +MatExpr operator | (const Matx<_Tp, m, n>& a, const Mat& b) { return Mat(a) | b; } CV_EXPORTS MatExpr operator ^ (const Mat& a, const Mat& b); CV_EXPORTS MatExpr operator ^ (const Mat& a, const Scalar& s); CV_EXPORTS MatExpr operator ^ (const Scalar& s, const Mat& a); +template static inline +MatExpr operator ^ (const Mat& a, const Matx<_Tp, m, n>& b) { return a ^ Mat(b); } +template static inline +MatExpr operator ^ (const Matx<_Tp, m, n>& a, const Mat& b) { return Mat(a) ^ b; } CV_EXPORTS MatExpr operator ~(const Mat& m); CV_EXPORTS MatExpr min(const Mat& a, const Mat& b); CV_EXPORTS MatExpr min(const Mat& a, double s); CV_EXPORTS MatExpr min(double s, const Mat& a); +template static inline +MatExpr min (const Mat& a, const Matx<_Tp, m, n>& b) { return min(a, Mat(b)); } +template static inline +MatExpr min (const Matx<_Tp, m, n>& a, const Mat& b) { return min(Mat(a), b); } CV_EXPORTS MatExpr max(const Mat& a, const Mat& b); CV_EXPORTS MatExpr max(const Mat& a, double s); CV_EXPORTS MatExpr max(double s, const Mat& a); +template static inline +MatExpr max (const Mat& a, const Matx<_Tp, m, n>& b) { return max(a, Mat(b)); } +template static inline +MatExpr max (const Matx<_Tp, m, n>& a, const Mat& b) { return max(Mat(a), b); } /** @brief Calculates an absolute value of each matrix element. @@ -3439,4 +3723,4 @@ CV_EXPORTS MatExpr abs(const MatExpr& e); #include "opencv2/core/mat.inl.hpp" -#endif // __OPENCV_CORE_MAT_HPP__ +#endif // OPENCV_CORE_MAT_HPP diff --git a/IPL/include/opencv/opencv2/core/mat.inl.hpp b/IPL/include/opencv/opencv2/core/mat.inl.hpp index b4b1418..5066fb1 100644 --- a/IPL/include/opencv/opencv2/core/mat.inl.hpp +++ b/IPL/include/opencv/opencv2/core/mat.inl.hpp @@ -42,18 +42,50 @@ // //M*/ -#ifndef __OPENCV_CORE_MATRIX_OPERATIONS_HPP__ -#define __OPENCV_CORE_MATRIX_OPERATIONS_HPP__ +#ifndef OPENCV_CORE_MATRIX_OPERATIONS_HPP +#define OPENCV_CORE_MATRIX_OPERATIONS_HPP #ifndef __cplusplus # error mat.inl.hpp header must be compiled as C++ #endif +#ifdef _MSC_VER +#pragma warning( push ) +#pragma warning( disable: 4127 ) +#endif + +#if defined(CV_SKIP_DISABLE_CLANG_ENUM_WARNINGS) + // nothing +#elif defined(CV_FORCE_DISABLE_CLANG_ENUM_WARNINGS) + #define CV_DISABLE_CLANG_ENUM_WARNINGS +#elif defined(__clang__) && defined(__has_warning) + #if __has_warning("-Wdeprecated-enum-enum-conversion") && __has_warning("-Wdeprecated-anon-enum-enum-conversion") + #define CV_DISABLE_CLANG_ENUM_WARNINGS + #endif +#endif +#ifdef CV_DISABLE_CLANG_ENUM_WARNINGS +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wdeprecated-enum-enum-conversion" +#pragma clang diagnostic ignored "-Wdeprecated-anon-enum-enum-conversion" +#endif + namespace cv { +CV__DEBUG_NS_BEGIN + //! @cond IGNORED +////////////////////////// Custom (raw) type wrapper ////////////////////////// + +template static inline +int rawType() +{ + CV_StaticAssert(sizeof(_Tp) <= CV_CN_MAX, "sizeof(_Tp) is too large"); + const int elemSize = sizeof(_Tp); + return (int)CV_MAKETYPE(CV_8U, elemSize); +} + //////////////////////// Input/Output Arrays //////////////////////// inline void _InputArray::init(int _flags, const void* _obj) @@ -66,7 +98,7 @@ inline void* _InputArray::getObj() const { return obj; } inline int _InputArray::getFlags() const { return flags; } inline Size _InputArray::getSz() const { return sz; } -inline _InputArray::_InputArray() { init(NONE, 0); } +inline _InputArray::_InputArray() { init(0 + NONE, 0); } inline _InputArray::_InputArray(int _flags, void* _obj) { init(_flags, _obj); } inline _InputArray::_InputArray(const Mat& m) { init(MAT+ACCESS_READ, &m); } inline _InputArray::_InputArray(const std::vector& vec) { init(STD_VECTOR_MAT+ACCESS_READ, &vec); } @@ -75,31 +107,39 @@ inline _InputArray::_InputArray(const std::vector& vec) { init(STD_VECTOR_ template inline _InputArray::_InputArray(const std::vector<_Tp>& vec) -{ init(FIXED_TYPE + STD_VECTOR + DataType<_Tp>::type + ACCESS_READ, &vec); } +{ init(FIXED_TYPE + STD_VECTOR + traits::Type<_Tp>::value + ACCESS_READ, &vec); } + +template inline +_InputArray::_InputArray(const std::array<_Tp, _Nm>& arr) +{ init(FIXED_TYPE + FIXED_SIZE + STD_ARRAY + traits::Type<_Tp>::value + ACCESS_READ, arr.data(), Size(1, _Nm)); } + +template inline +_InputArray::_InputArray(const std::array& arr) +{ init(STD_ARRAY_MAT + ACCESS_READ, arr.data(), Size(1, _Nm)); } inline _InputArray::_InputArray(const std::vector& vec) -{ init(FIXED_TYPE + STD_BOOL_VECTOR + DataType::type + ACCESS_READ, &vec); } +{ init(FIXED_TYPE + STD_BOOL_VECTOR + traits::Type::value + ACCESS_READ, &vec); } template inline _InputArray::_InputArray(const std::vector >& vec) -{ init(FIXED_TYPE + STD_VECTOR_VECTOR + DataType<_Tp>::type + ACCESS_READ, &vec); } +{ init(FIXED_TYPE + STD_VECTOR_VECTOR + traits::Type<_Tp>::value + ACCESS_READ, &vec); } template inline _InputArray::_InputArray(const std::vector >& vec) -{ init(FIXED_TYPE + STD_VECTOR_MAT + DataType<_Tp>::type + ACCESS_READ, &vec); } +{ init(FIXED_TYPE + STD_VECTOR_MAT + traits::Type<_Tp>::value + ACCESS_READ, &vec); } template inline _InputArray::_InputArray(const Matx<_Tp, m, n>& mtx) -{ init(FIXED_TYPE + FIXED_SIZE + MATX + DataType<_Tp>::type + ACCESS_READ, &mtx, Size(n, m)); } +{ init(FIXED_TYPE + FIXED_SIZE + MATX + traits::Type<_Tp>::value + ACCESS_READ, &mtx, Size(n, m)); } template inline _InputArray::_InputArray(const _Tp* vec, int n) -{ init(FIXED_TYPE + FIXED_SIZE + MATX + DataType<_Tp>::type + ACCESS_READ, vec, Size(n, 1)); } +{ init(FIXED_TYPE + FIXED_SIZE + MATX + traits::Type<_Tp>::value + ACCESS_READ, vec, Size(n, 1)); } template inline _InputArray::_InputArray(const Mat_<_Tp>& m) -{ init(FIXED_TYPE + MAT + DataType<_Tp>::type + ACCESS_READ, &m); } +{ init(FIXED_TYPE + MAT + traits::Type<_Tp>::value + ACCESS_READ, &m); } inline _InputArray::_InputArray(const double& val) { init(FIXED_TYPE + FIXED_SIZE + MATX + CV_64F + ACCESS_READ, &val, Size(1,1)); } @@ -119,6 +159,25 @@ inline _InputArray::_InputArray(const ogl::Buffer& buf) inline _InputArray::_InputArray(const cuda::HostMem& cuda_mem) { init(CUDA_HOST_MEM + ACCESS_READ, &cuda_mem); } +template inline +_InputArray _InputArray::rawIn(const std::vector<_Tp>& vec) +{ + _InputArray v; + v.flags = _InputArray::FIXED_TYPE + _InputArray::STD_VECTOR + rawType<_Tp>() + ACCESS_READ; + v.obj = (void*)&vec; + return v; +} + +template inline +_InputArray _InputArray::rawIn(const std::array<_Tp, _Nm>& arr) +{ + _InputArray v; + v.flags = FIXED_TYPE + FIXED_SIZE + STD_ARRAY + traits::Type<_Tp>::value + ACCESS_READ; + v.obj = (void*)arr.data(); + v.sz = Size(1, _Nm); + return v; +} + inline _InputArray::~_InputArray() {} inline Mat _InputArray::getMat(int i) const @@ -133,69 +192,84 @@ inline bool _InputArray::isUMat() const { return kind() == _InputArray::UMAT; } inline bool _InputArray::isMatVector() const { return kind() == _InputArray::STD_VECTOR_MAT; } inline bool _InputArray::isUMatVector() const { return kind() == _InputArray::STD_VECTOR_UMAT; } inline bool _InputArray::isMatx() const { return kind() == _InputArray::MATX; } -inline bool _InputArray::isVector() const { return kind() == _InputArray::STD_VECTOR || kind() == _InputArray::STD_BOOL_VECTOR; } +inline bool _InputArray::isVector() const { return kind() == _InputArray::STD_VECTOR || + kind() == _InputArray::STD_BOOL_VECTOR || + kind() == _InputArray::STD_ARRAY; } +inline bool _InputArray::isGpuMat() const { return kind() == _InputArray::CUDA_GPU_MAT; } inline bool _InputArray::isGpuMatVector() const { return kind() == _InputArray::STD_VECTOR_CUDA_GPU_MAT; } //////////////////////////////////////////////////////////////////////////////////////// -inline _OutputArray::_OutputArray() { init(ACCESS_WRITE, 0); } -inline _OutputArray::_OutputArray(int _flags, void* _obj) { init(_flags|ACCESS_WRITE, _obj); } +inline _OutputArray::_OutputArray() { init(NONE + ACCESS_WRITE, 0); } +inline _OutputArray::_OutputArray(int _flags, void* _obj) { init(_flags + ACCESS_WRITE, _obj); } inline _OutputArray::_OutputArray(Mat& m) { init(MAT+ACCESS_WRITE, &m); } -inline _OutputArray::_OutputArray(std::vector& vec) { init(STD_VECTOR_MAT+ACCESS_WRITE, &vec); } -inline _OutputArray::_OutputArray(UMat& m) { init(UMAT+ACCESS_WRITE, &m); } -inline _OutputArray::_OutputArray(std::vector& vec) { init(STD_VECTOR_UMAT+ACCESS_WRITE, &vec); } +inline _OutputArray::_OutputArray(std::vector& vec) { init(STD_VECTOR_MAT + ACCESS_WRITE, &vec); } +inline _OutputArray::_OutputArray(UMat& m) { init(UMAT + ACCESS_WRITE, &m); } +inline _OutputArray::_OutputArray(std::vector& vec) { init(STD_VECTOR_UMAT + ACCESS_WRITE, &vec); } template inline _OutputArray::_OutputArray(std::vector<_Tp>& vec) -{ init(FIXED_TYPE + STD_VECTOR + DataType<_Tp>::type + ACCESS_WRITE, &vec); } +{ init(FIXED_TYPE + STD_VECTOR + traits::Type<_Tp>::value + ACCESS_WRITE, &vec); } -inline -_OutputArray::_OutputArray(std::vector&) -{ CV_Error(Error::StsUnsupportedFormat, "std::vector cannot be an output array\n"); } +template inline +_OutputArray::_OutputArray(std::array<_Tp, _Nm>& arr) +{ init(FIXED_TYPE + FIXED_SIZE + STD_ARRAY + traits::Type<_Tp>::value + ACCESS_WRITE, arr.data(), Size(1, _Nm)); } + +template inline +_OutputArray::_OutputArray(std::array& arr) +{ init(STD_ARRAY_MAT + ACCESS_WRITE, arr.data(), Size(1, _Nm)); } template inline _OutputArray::_OutputArray(std::vector >& vec) -{ init(FIXED_TYPE + STD_VECTOR_VECTOR + DataType<_Tp>::type + ACCESS_WRITE, &vec); } +{ init(FIXED_TYPE + STD_VECTOR_VECTOR + traits::Type<_Tp>::value + ACCESS_WRITE, &vec); } template inline _OutputArray::_OutputArray(std::vector >& vec) -{ init(FIXED_TYPE + STD_VECTOR_MAT + DataType<_Tp>::type + ACCESS_WRITE, &vec); } +{ init(FIXED_TYPE + STD_VECTOR_MAT + traits::Type<_Tp>::value + ACCESS_WRITE, &vec); } template inline _OutputArray::_OutputArray(Mat_<_Tp>& m) -{ init(FIXED_TYPE + MAT + DataType<_Tp>::type + ACCESS_WRITE, &m); } +{ init(FIXED_TYPE + MAT + traits::Type<_Tp>::value + ACCESS_WRITE, &m); } template inline _OutputArray::_OutputArray(Matx<_Tp, m, n>& mtx) -{ init(FIXED_TYPE + FIXED_SIZE + MATX + DataType<_Tp>::type + ACCESS_WRITE, &mtx, Size(n, m)); } +{ init(FIXED_TYPE + FIXED_SIZE + MATX + traits::Type<_Tp>::value + ACCESS_WRITE, &mtx, Size(n, m)); } template inline _OutputArray::_OutputArray(_Tp* vec, int n) -{ init(FIXED_TYPE + FIXED_SIZE + MATX + DataType<_Tp>::type + ACCESS_WRITE, vec, Size(n, 1)); } +{ init(FIXED_TYPE + FIXED_SIZE + MATX + traits::Type<_Tp>::value + ACCESS_WRITE, vec, Size(n, 1)); } template inline _OutputArray::_OutputArray(const std::vector<_Tp>& vec) -{ init(FIXED_TYPE + FIXED_SIZE + STD_VECTOR + DataType<_Tp>::type + ACCESS_WRITE, &vec); } +{ init(FIXED_TYPE + FIXED_SIZE + STD_VECTOR + traits::Type<_Tp>::value + ACCESS_WRITE, &vec); } + +template inline +_OutputArray::_OutputArray(const std::array<_Tp, _Nm>& arr) +{ init(FIXED_TYPE + FIXED_SIZE + STD_ARRAY + traits::Type<_Tp>::value + ACCESS_WRITE, arr.data(), Size(1, _Nm)); } + +template inline +_OutputArray::_OutputArray(const std::array& arr) +{ init(FIXED_SIZE + STD_ARRAY_MAT + ACCESS_WRITE, arr.data(), Size(1, _Nm)); } template inline _OutputArray::_OutputArray(const std::vector >& vec) -{ init(FIXED_TYPE + FIXED_SIZE + STD_VECTOR_VECTOR + DataType<_Tp>::type + ACCESS_WRITE, &vec); } +{ init(FIXED_TYPE + FIXED_SIZE + STD_VECTOR_VECTOR + traits::Type<_Tp>::value + ACCESS_WRITE, &vec); } template inline _OutputArray::_OutputArray(const std::vector >& vec) -{ init(FIXED_TYPE + FIXED_SIZE + STD_VECTOR_MAT + DataType<_Tp>::type + ACCESS_WRITE, &vec); } +{ init(FIXED_TYPE + FIXED_SIZE + STD_VECTOR_MAT + traits::Type<_Tp>::value + ACCESS_WRITE, &vec); } template inline _OutputArray::_OutputArray(const Mat_<_Tp>& m) -{ init(FIXED_TYPE + FIXED_SIZE + MAT + DataType<_Tp>::type + ACCESS_WRITE, &m); } +{ init(FIXED_TYPE + FIXED_SIZE + MAT + traits::Type<_Tp>::value + ACCESS_WRITE, &m); } template inline _OutputArray::_OutputArray(const Matx<_Tp, m, n>& mtx) -{ init(FIXED_TYPE + FIXED_SIZE + MATX + DataType<_Tp>::type + ACCESS_WRITE, &mtx, Size(n, m)); } +{ init(FIXED_TYPE + FIXED_SIZE + MATX + traits::Type<_Tp>::value + ACCESS_WRITE, &mtx, Size(n, m)); } template inline _OutputArray::_OutputArray(const _Tp* vec, int n) -{ init(FIXED_TYPE + FIXED_SIZE + MATX + DataType<_Tp>::type + ACCESS_WRITE, vec, Size(n, 1)); } +{ init(FIXED_TYPE + FIXED_SIZE + MATX + traits::Type<_Tp>::value + ACCESS_WRITE, vec, Size(n, 1)); } inline _OutputArray::_OutputArray(cuda::GpuMat& d_mat) { init(CUDA_GPU_MAT + ACCESS_WRITE, &d_mat); } @@ -231,10 +305,29 @@ inline _OutputArray::_OutputArray(const ogl::Buffer& buf) inline _OutputArray::_OutputArray(const cuda::HostMem& cuda_mem) { init(FIXED_TYPE + FIXED_SIZE + CUDA_HOST_MEM + ACCESS_WRITE, &cuda_mem); } +template inline +_OutputArray _OutputArray::rawOut(std::vector<_Tp>& vec) +{ + _OutputArray v; + v.flags = _InputArray::FIXED_TYPE + _InputArray::STD_VECTOR + rawType<_Tp>() + ACCESS_WRITE; + v.obj = (void*)&vec; + return v; +} + +template inline +_OutputArray _OutputArray::rawOut(std::array<_Tp, _Nm>& arr) +{ + _OutputArray v; + v.flags = FIXED_TYPE + FIXED_SIZE + STD_ARRAY + traits::Type<_Tp>::value + ACCESS_WRITE; + v.obj = (void*)arr.data(); + v.sz = Size(1, _Nm); + return v; +} + /////////////////////////////////////////////////////////////////////////////////////////// -inline _InputOutputArray::_InputOutputArray() { init(ACCESS_RW, 0); } -inline _InputOutputArray::_InputOutputArray(int _flags, void* _obj) { init(_flags|ACCESS_RW, _obj); } +inline _InputOutputArray::_InputOutputArray() { init(0+ACCESS_RW, 0); } +inline _InputOutputArray::_InputOutputArray(int _flags, void* _obj) { init(_flags+ACCESS_RW, _obj); } inline _InputOutputArray::_InputOutputArray(Mat& m) { init(MAT+ACCESS_RW, &m); } inline _InputOutputArray::_InputOutputArray(std::vector& vec) { init(STD_VECTOR_MAT+ACCESS_RW, &vec); } inline _InputOutputArray::_InputOutputArray(UMat& m) { init(UMAT+ACCESS_RW, &m); } @@ -242,54 +335,67 @@ inline _InputOutputArray::_InputOutputArray(std::vector& vec) { init(STD_V template inline _InputOutputArray::_InputOutputArray(std::vector<_Tp>& vec) -{ init(FIXED_TYPE + STD_VECTOR + DataType<_Tp>::type + ACCESS_RW, &vec); } +{ init(FIXED_TYPE + STD_VECTOR + traits::Type<_Tp>::value + ACCESS_RW, &vec); } + +template inline +_InputOutputArray::_InputOutputArray(std::array<_Tp, _Nm>& arr) +{ init(FIXED_TYPE + FIXED_SIZE + STD_ARRAY + traits::Type<_Tp>::value + ACCESS_RW, arr.data(), Size(1, _Nm)); } -inline _InputOutputArray::_InputOutputArray(std::vector&) -{ CV_Error(Error::StsUnsupportedFormat, "std::vector cannot be an input/output array\n"); } +template inline +_InputOutputArray::_InputOutputArray(std::array& arr) +{ init(STD_ARRAY_MAT + ACCESS_RW, arr.data(), Size(1, _Nm)); } template inline _InputOutputArray::_InputOutputArray(std::vector >& vec) -{ init(FIXED_TYPE + STD_VECTOR_VECTOR + DataType<_Tp>::type + ACCESS_RW, &vec); } +{ init(FIXED_TYPE + STD_VECTOR_VECTOR + traits::Type<_Tp>::value + ACCESS_RW, &vec); } template inline _InputOutputArray::_InputOutputArray(std::vector >& vec) -{ init(FIXED_TYPE + STD_VECTOR_MAT + DataType<_Tp>::type + ACCESS_RW, &vec); } +{ init(FIXED_TYPE + STD_VECTOR_MAT + traits::Type<_Tp>::value + ACCESS_RW, &vec); } template inline _InputOutputArray::_InputOutputArray(Mat_<_Tp>& m) -{ init(FIXED_TYPE + MAT + DataType<_Tp>::type + ACCESS_RW, &m); } +{ init(FIXED_TYPE + MAT + traits::Type<_Tp>::value + ACCESS_RW, &m); } template inline _InputOutputArray::_InputOutputArray(Matx<_Tp, m, n>& mtx) -{ init(FIXED_TYPE + FIXED_SIZE + MATX + DataType<_Tp>::type + ACCESS_RW, &mtx, Size(n, m)); } +{ init(FIXED_TYPE + FIXED_SIZE + MATX + traits::Type<_Tp>::value + ACCESS_RW, &mtx, Size(n, m)); } template inline _InputOutputArray::_InputOutputArray(_Tp* vec, int n) -{ init(FIXED_TYPE + FIXED_SIZE + MATX + DataType<_Tp>::type + ACCESS_RW, vec, Size(n, 1)); } +{ init(FIXED_TYPE + FIXED_SIZE + MATX + traits::Type<_Tp>::value + ACCESS_RW, vec, Size(n, 1)); } template inline _InputOutputArray::_InputOutputArray(const std::vector<_Tp>& vec) -{ init(FIXED_TYPE + FIXED_SIZE + STD_VECTOR + DataType<_Tp>::type + ACCESS_RW, &vec); } +{ init(FIXED_TYPE + FIXED_SIZE + STD_VECTOR + traits::Type<_Tp>::value + ACCESS_RW, &vec); } + +template inline +_InputOutputArray::_InputOutputArray(const std::array<_Tp, _Nm>& arr) +{ init(FIXED_TYPE + FIXED_SIZE + STD_ARRAY + traits::Type<_Tp>::value + ACCESS_RW, arr.data(), Size(1, _Nm)); } + +template inline +_InputOutputArray::_InputOutputArray(const std::array& arr) +{ init(FIXED_SIZE + STD_ARRAY_MAT + ACCESS_RW, arr.data(), Size(1, _Nm)); } template inline _InputOutputArray::_InputOutputArray(const std::vector >& vec) -{ init(FIXED_TYPE + FIXED_SIZE + STD_VECTOR_VECTOR + DataType<_Tp>::type + ACCESS_RW, &vec); } +{ init(FIXED_TYPE + FIXED_SIZE + STD_VECTOR_VECTOR + traits::Type<_Tp>::value + ACCESS_RW, &vec); } template inline _InputOutputArray::_InputOutputArray(const std::vector >& vec) -{ init(FIXED_TYPE + FIXED_SIZE + STD_VECTOR_MAT + DataType<_Tp>::type + ACCESS_RW, &vec); } +{ init(FIXED_TYPE + FIXED_SIZE + STD_VECTOR_MAT + traits::Type<_Tp>::value + ACCESS_RW, &vec); } template inline _InputOutputArray::_InputOutputArray(const Mat_<_Tp>& m) -{ init(FIXED_TYPE + FIXED_SIZE + MAT + DataType<_Tp>::type + ACCESS_RW, &m); } +{ init(FIXED_TYPE + FIXED_SIZE + MAT + traits::Type<_Tp>::value + ACCESS_RW, &m); } template inline _InputOutputArray::_InputOutputArray(const Matx<_Tp, m, n>& mtx) -{ init(FIXED_TYPE + FIXED_SIZE + MATX + DataType<_Tp>::type + ACCESS_RW, &mtx, Size(n, m)); } +{ init(FIXED_TYPE + FIXED_SIZE + MATX + traits::Type<_Tp>::value + ACCESS_RW, &mtx, Size(n, m)); } template inline _InputOutputArray::_InputOutputArray(const _Tp* vec, int n) -{ init(FIXED_TYPE + FIXED_SIZE + MATX + DataType<_Tp>::type + ACCESS_RW, vec, Size(n, 1)); } +{ init(FIXED_TYPE + FIXED_SIZE + MATX + traits::Type<_Tp>::value + ACCESS_RW, vec, Size(n, 1)); } inline _InputOutputArray::_InputOutputArray(cuda::GpuMat& d_mat) { init(CUDA_GPU_MAT + ACCESS_RW, &d_mat); } @@ -314,8 +420,12 @@ inline _InputOutputArray::_InputOutputArray(const std::vector& vec) inline _InputOutputArray::_InputOutputArray(const cuda::GpuMat& d_mat) { init(FIXED_TYPE + FIXED_SIZE + CUDA_GPU_MAT + ACCESS_RW, &d_mat); } + inline _InputOutputArray::_InputOutputArray(const std::vector& d_mat) -{ init(FIXED_TYPE + FIXED_SIZE + STD_VECTOR_CUDA_GPU_MAT + ACCESS_RW, &d_mat);} +{ init(FIXED_TYPE + FIXED_SIZE + STD_VECTOR_CUDA_GPU_MAT + ACCESS_RW, &d_mat);} + +template<> inline _InputOutputArray::_InputOutputArray(std::vector& d_mat) +{ init(FIXED_TYPE + FIXED_SIZE + STD_VECTOR_CUDA_GPU_MAT + ACCESS_RW, &d_mat);} inline _InputOutputArray::_InputOutputArray(const ogl::Buffer& buf) { init(FIXED_TYPE + FIXED_SIZE + OPENGL_BUFFER + ACCESS_RW, &buf); } @@ -323,18 +433,44 @@ inline _InputOutputArray::_InputOutputArray(const ogl::Buffer& buf) inline _InputOutputArray::_InputOutputArray(const cuda::HostMem& cuda_mem) { init(FIXED_TYPE + FIXED_SIZE + CUDA_HOST_MEM + ACCESS_RW, &cuda_mem); } +template inline +_InputOutputArray _InputOutputArray::rawInOut(std::vector<_Tp>& vec) +{ + _InputOutputArray v; + v.flags = _InputArray::FIXED_TYPE + _InputArray::STD_VECTOR + rawType<_Tp>() + ACCESS_RW; + v.obj = (void*)&vec; + return v; +} + +template inline +_InputOutputArray _InputOutputArray::rawInOut(std::array<_Tp, _Nm>& arr) +{ + _InputOutputArray v; + v.flags = FIXED_TYPE + FIXED_SIZE + STD_ARRAY + traits::Type<_Tp>::value + ACCESS_RW; + v.obj = (void*)arr.data(); + v.sz = Size(1, _Nm); + return v; +} + + +template static inline _InputArray rawIn(_Tp& v) { return _InputArray::rawIn(v); } +template static inline _OutputArray rawOut(_Tp& v) { return _OutputArray::rawOut(v); } +template static inline _InputOutputArray rawInOut(_Tp& v) { return _InputOutputArray::rawInOut(v); } + +CV__DEBUG_NS_END + //////////////////////////////////////////// Mat ////////////////////////////////////////// inline Mat::Mat() : flags(MAGIC_VAL), dims(0), rows(0), cols(0), data(0), datastart(0), dataend(0), - datalimit(0), allocator(0), u(0), size(&rows) + datalimit(0), allocator(0), u(0), size(&rows), step(0) {} inline Mat::Mat(int _rows, int _cols, int _type) : flags(MAGIC_VAL), dims(0), rows(0), cols(0), data(0), datastart(0), dataend(0), - datalimit(0), allocator(0), u(0), size(&rows) + datalimit(0), allocator(0), u(0), size(&rows), step(0) { create(_rows, _cols, _type); } @@ -342,7 +478,7 @@ Mat::Mat(int _rows, int _cols, int _type) inline Mat::Mat(int _rows, int _cols, int _type, const Scalar& _s) : flags(MAGIC_VAL), dims(0), rows(0), cols(0), data(0), datastart(0), dataend(0), - datalimit(0), allocator(0), u(0), size(&rows) + datalimit(0), allocator(0), u(0), size(&rows), step(0) { create(_rows, _cols, _type); *this = _s; @@ -351,7 +487,7 @@ Mat::Mat(int _rows, int _cols, int _type, const Scalar& _s) inline Mat::Mat(Size _sz, int _type) : flags(MAGIC_VAL), dims(0), rows(0), cols(0), data(0), datastart(0), dataend(0), - datalimit(0), allocator(0), u(0), size(&rows) + datalimit(0), allocator(0), u(0), size(&rows), step(0) { create( _sz.height, _sz.width, _type ); } @@ -359,7 +495,7 @@ Mat::Mat(Size _sz, int _type) inline Mat::Mat(Size _sz, int _type, const Scalar& _s) : flags(MAGIC_VAL), dims(0), rows(0), cols(0), data(0), datastart(0), dataend(0), - datalimit(0), allocator(0), u(0), size(&rows) + datalimit(0), allocator(0), u(0), size(&rows), step(0) { create(_sz.height, _sz.width, _type); *this = _s; @@ -368,7 +504,7 @@ Mat::Mat(Size _sz, int _type, const Scalar& _s) inline Mat::Mat(int _dims, const int* _sz, int _type) : flags(MAGIC_VAL), dims(0), rows(0), cols(0), data(0), datastart(0), dataend(0), - datalimit(0), allocator(0), u(0), size(&rows) + datalimit(0), allocator(0), u(0), size(&rows), step(0) { create(_dims, _sz, _type); } @@ -376,17 +512,34 @@ Mat::Mat(int _dims, const int* _sz, int _type) inline Mat::Mat(int _dims, const int* _sz, int _type, const Scalar& _s) : flags(MAGIC_VAL), dims(0), rows(0), cols(0), data(0), datastart(0), dataend(0), - datalimit(0), allocator(0), u(0), size(&rows) + datalimit(0), allocator(0), u(0), size(&rows), step(0) { create(_dims, _sz, _type); *this = _s; } +inline +Mat::Mat(const std::vector& _sz, int _type) + : flags(MAGIC_VAL), dims(0), rows(0), cols(0), data(0), datastart(0), dataend(0), + datalimit(0), allocator(0), u(0), size(&rows), step(0) +{ + create(_sz, _type); +} + +inline +Mat::Mat(const std::vector& _sz, int _type, const Scalar& _s) + : flags(MAGIC_VAL), dims(0), rows(0), cols(0), data(0), datastart(0), dataend(0), + datalimit(0), allocator(0), u(0), size(&rows), step(0) +{ + create(_sz, _type); + *this = _s; +} + inline Mat::Mat(const Mat& m) : flags(m.flags), dims(m.dims), rows(m.rows), cols(m.cols), data(m.data), datastart(m.datastart), dataend(m.dataend), datalimit(m.datalimit), allocator(m.allocator), - u(m.u), size(&rows) + u(m.u), size(&rows), step(0) { if( u ) CV_XADD(&u->refcount, 1); @@ -414,24 +567,20 @@ Mat::Mat(int _rows, int _cols, int _type, void* _data, size_t _step) if( _step == AUTO_STEP ) { _step = minstep; - flags |= CONTINUOUS_FLAG; } else { - if( rows == 1 ) _step = minstep; CV_DbgAssert( _step >= minstep ); - if (_step % esz1 != 0) { CV_Error(Error::BadStep, "Step must be a multiple of esz1"); } - - flags |= _step == minstep ? CONTINUOUS_FLAG : 0; } step[0] = _step; step[1] = esz; datalimit = datastart + _step * rows; dataend = datalimit - _step + minstep; + updateContinuityFlag(); } inline @@ -447,30 +596,27 @@ Mat::Mat(Size _sz, int _type, void* _data, size_t _step) if( _step == AUTO_STEP ) { _step = minstep; - flags |= CONTINUOUS_FLAG; } else { - if( rows == 1 ) _step = minstep; CV_DbgAssert( _step >= minstep ); if (_step % esz1 != 0) { CV_Error(Error::BadStep, "Step must be a multiple of esz1"); } - - flags |= _step == minstep ? CONTINUOUS_FLAG : 0; } step[0] = _step; step[1] = esz; datalimit = datastart + _step*rows; dataend = datalimit - _step + minstep; + updateContinuityFlag(); } template inline Mat::Mat(const std::vector<_Tp>& vec, bool copyData) - : flags(MAGIC_VAL | DataType<_Tp>::type | CV_MAT_CONT_FLAG), dims(2), rows((int)vec.size()), - cols(1), data(0), datastart(0), dataend(0), allocator(0), u(0), size(&rows) + : flags(MAGIC_VAL + traits::Type<_Tp>::value + CV_MAT_CONT_FLAG), dims(2), rows((int)vec.size()), + cols(1), data(0), datastart(0), dataend(0), datalimit(0), allocator(0), u(0), size(&rows), step(0) { if(vec.empty()) return; @@ -481,13 +627,50 @@ Mat::Mat(const std::vector<_Tp>& vec, bool copyData) datalimit = dataend = datastart + rows * step[0]; } else - Mat((int)vec.size(), 1, DataType<_Tp>::type, (uchar*)&vec[0]).copyTo(*this); + Mat((int)vec.size(), 1, traits::Type<_Tp>::value, (uchar*)&vec[0]).copyTo(*this); +} + +template inline +Mat::Mat(const std::initializer_list<_Tp> list) + : Mat() +{ + CV_Assert(list.size() != 0); + Mat((int)list.size(), 1, traits::Type<_Tp>::value, (uchar*)list.begin()).copyTo(*this); +} + +template inline +Mat::Mat(const std::initializer_list sizes, const std::initializer_list<_Tp> list) + : Mat() +{ + size_t size_total = 1; + for(auto s : sizes) + size_total *= s; + CV_Assert(list.size() != 0); + CV_Assert(size_total == list.size()); + Mat((int)sizes.size(), (int*)sizes.begin(), traits::Type<_Tp>::value, (uchar*)list.begin()).copyTo(*this); +} + +template inline +Mat::Mat(const std::array<_Tp, _Nm>& arr, bool copyData) + : flags(MAGIC_VAL + traits::Type<_Tp>::value + CV_MAT_CONT_FLAG), dims(2), rows((int)arr.size()), + cols(1), data(0), datastart(0), dataend(0), datalimit(0), allocator(0), u(0), size(&rows), step(0) +{ + if(arr.empty()) + return; + if( !copyData ) + { + step[0] = step[1] = sizeof(_Tp); + datastart = data = (uchar*)arr.data(); + datalimit = dataend = datastart + rows * step[0]; + } + else + Mat((int)arr.size(), 1, traits::Type<_Tp>::value, (uchar*)arr.data()).copyTo(*this); } template inline Mat::Mat(const Vec<_Tp, n>& vec, bool copyData) - : flags(MAGIC_VAL | DataType<_Tp>::type | CV_MAT_CONT_FLAG), dims(2), rows(n), cols(1), data(0), - datastart(0), dataend(0), allocator(0), u(0), size(&rows) + : flags(MAGIC_VAL + traits::Type<_Tp>::value + CV_MAT_CONT_FLAG), dims(2), rows(n), cols(1), data(0), + datastart(0), dataend(0), datalimit(0), allocator(0), u(0), size(&rows), step(0) { if( !copyData ) { @@ -496,14 +679,14 @@ Mat::Mat(const Vec<_Tp, n>& vec, bool copyData) datalimit = dataend = datastart + rows * step[0]; } else - Mat(n, 1, DataType<_Tp>::type, (void*)vec.val).copyTo(*this); + Mat(n, 1, traits::Type<_Tp>::value, (void*)vec.val).copyTo(*this); } template inline Mat::Mat(const Matx<_Tp,m,n>& M, bool copyData) - : flags(MAGIC_VAL | DataType<_Tp>::type | CV_MAT_CONT_FLAG), dims(2), rows(m), cols(n), data(0), - datastart(0), dataend(0), allocator(0), u(0), size(&rows) + : flags(MAGIC_VAL + traits::Type<_Tp>::value + CV_MAT_CONT_FLAG), dims(2), rows(m), cols(n), data(0), + datastart(0), dataend(0), datalimit(0), allocator(0), u(0), size(&rows), step(0) { if( !copyData ) { @@ -513,13 +696,13 @@ Mat::Mat(const Matx<_Tp,m,n>& M, bool copyData) datalimit = dataend = datastart + rows * step[0]; } else - Mat(m, n, DataType<_Tp>::type, (uchar*)M.val).copyTo(*this); + Mat(m, n, traits::Type<_Tp>::value, (uchar*)M.val).copyTo(*this); } template inline Mat::Mat(const Point_<_Tp>& pt, bool copyData) - : flags(MAGIC_VAL | DataType<_Tp>::type | CV_MAT_CONT_FLAG), dims(2), rows(2), cols(1), data(0), - datastart(0), dataend(0), allocator(0), u(0), size(&rows) + : flags(MAGIC_VAL + traits::Type<_Tp>::value + CV_MAT_CONT_FLAG), dims(2), rows(2), cols(1), data(0), + datastart(0), dataend(0), datalimit(0), allocator(0), u(0), size(&rows), step(0) { if( !copyData ) { @@ -529,7 +712,7 @@ Mat::Mat(const Point_<_Tp>& pt, bool copyData) } else { - create(2, 1, DataType<_Tp>::type); + create(2, 1, traits::Type<_Tp>::value); ((_Tp*)data)[0] = pt.x; ((_Tp*)data)[1] = pt.y; } @@ -537,8 +720,8 @@ Mat::Mat(const Point_<_Tp>& pt, bool copyData) template inline Mat::Mat(const Point3_<_Tp>& pt, bool copyData) - : flags(MAGIC_VAL | DataType<_Tp>::type | CV_MAT_CONT_FLAG), dims(2), rows(3), cols(1), data(0), - datastart(0), dataend(0), allocator(0), u(0), size(&rows) + : flags(MAGIC_VAL + traits::Type<_Tp>::value + CV_MAT_CONT_FLAG), dims(2), rows(3), cols(1), data(0), + datastart(0), dataend(0), datalimit(0), allocator(0), u(0), size(&rows), step(0) { if( !copyData ) { @@ -548,7 +731,7 @@ Mat::Mat(const Point3_<_Tp>& pt, bool copyData) } else { - create(3, 1, DataType<_Tp>::type); + create(3, 1, traits::Type<_Tp>::value); ((_Tp*)data)[0] = pt.x; ((_Tp*)data)[1] = pt.y; ((_Tp*)data)[2] = pt.z; @@ -557,7 +740,7 @@ Mat::Mat(const Point3_<_Tp>& pt, bool copyData) template inline Mat::Mat(const MatCommaInitializer_<_Tp>& commaInitializer) - : flags(MAGIC_VAL | DataType<_Tp>::type | CV_MAT_CONT_FLAG), dims(0), rows(0), cols(0), data(0), + : flags(MAGIC_VAL + traits::Type<_Tp>::value + CV_MAT_CONT_FLAG), dims(0), rows(0), cols(0), data(0), datastart(0), dataend(0), allocator(0), u(0), size(&rows) { *this = commaInitializer.operator Mat_<_Tp>(); @@ -676,7 +859,8 @@ void Mat::addref() CV_XADD(&u->refcount, 1); } -inline void Mat::release() +inline +void Mat::release() { if( u && CV_XADD(&u->refcount, -1) == 1 ) deallocate(); @@ -684,6 +868,16 @@ inline void Mat::release() datastart = dataend = datalimit = data = 0; for(int i = 0; i < dims; i++) size.p[i] = 0; +#ifdef _DEBUG + flags = MAGIC_VAL; + dims = rows = cols = 0; + if(step.p != step.buf) + { + fastFree(step.p); + step.p = step.buf; + size.p = &rows; + } +#endif } inline @@ -704,6 +898,12 @@ Mat Mat::operator()(const Range* ranges) const return Mat(*this, ranges); } +inline +Mat Mat::operator()(const std::vector& ranges) const +{ + return Mat(*this, ranges); +} + inline bool Mat::isContinuous() const { @@ -719,7 +919,9 @@ bool Mat::isSubmatrix() const inline size_t Mat::elemSize() const { - return dims > 0 ? step.p[dims - 1] : 0; + size_t res = dims > 0 ? step.p[dims - 1] : 0; + CV_DbgAssert(res != 0); + return res; } inline @@ -755,7 +957,7 @@ size_t Mat::step1(int i) const inline bool Mat::empty() const { - return data == 0 || total() == 0; + return data == 0 || total() == 0 || dims == 0; } inline @@ -769,6 +971,17 @@ size_t Mat::total() const return p; } +inline +size_t Mat::total(int startDim, int endDim) const +{ + CV_Assert( 0 <= startDim && startDim <= endDim); + size_t p = 1; + int endDim_ = endDim <= dims ? endDim : dims; + for( int i = startDim; i < endDim_; i++ ) + p *= size[i]; + return p; +} + inline uchar* Mat::ptr(int y) { @@ -793,7 +1006,7 @@ _Tp* Mat::ptr(int y) template inline const _Tp* Mat::ptr(int y) const { - CV_DbgAssert( y == 0 || (data && dims >= 1 && data && (unsigned)y < (unsigned)size.p[0]) ); + CV_DbgAssert( y == 0 || (data && dims >= 1 && (unsigned)y < (unsigned)size.p[0]) ); return (const _Tp*)(data + step.p[0] * y); } @@ -909,6 +1122,34 @@ const uchar* Mat::ptr(const int* idx) const return p; } +template inline +_Tp* Mat::ptr(const int* idx) +{ + int i, d = dims; + uchar* p = data; + CV_DbgAssert( d >= 1 && p ); + for( i = 0; i < d; i++ ) + { + CV_DbgAssert( (unsigned)idx[i] < (unsigned)size.p[i] ); + p += idx[i] * step.p[i]; + } + return (_Tp*)p; +} + +template inline +const _Tp* Mat::ptr(const int* idx) const +{ + int i, d = dims; + uchar* p = data; + CV_DbgAssert( d >= 1 && p ); + for( i = 0; i < d; i++ ) + { + CV_DbgAssert( (unsigned)idx[i] < (unsigned)size.p[i] ); + p += idx[i] * step.p[i]; + } + return (const _Tp*)p; +} + template inline _Tp& Mat::at(int i0, int i1) { @@ -916,7 +1157,7 @@ _Tp& Mat::at(int i0, int i1) CV_DbgAssert(data); CV_DbgAssert((unsigned)i0 < (unsigned)size.p[0]); CV_DbgAssert((unsigned)(i1 * DataType<_Tp>::channels) < (unsigned)(size.p[1] * channels())); - CV_DbgAssert(CV_ELEM_SIZE1(DataType<_Tp>::depth) == elemSize1()); + CV_DbgAssert(CV_ELEM_SIZE1(traits::Depth<_Tp>::value) == elemSize1()); return ((_Tp*)(data + step.p[0] * i0))[i1]; } @@ -927,7 +1168,7 @@ const _Tp& Mat::at(int i0, int i1) const CV_DbgAssert(data); CV_DbgAssert((unsigned)i0 < (unsigned)size.p[0]); CV_DbgAssert((unsigned)(i1 * DataType<_Tp>::channels) < (unsigned)(size.p[1] * channels())); - CV_DbgAssert(CV_ELEM_SIZE1(DataType<_Tp>::depth) == elemSize1()); + CV_DbgAssert(CV_ELEM_SIZE1(traits::Depth<_Tp>::value) == elemSize1()); return ((const _Tp*)(data + step.p[0] * i0))[i1]; } @@ -938,7 +1179,7 @@ _Tp& Mat::at(Point pt) CV_DbgAssert(data); CV_DbgAssert((unsigned)pt.y < (unsigned)size.p[0]); CV_DbgAssert((unsigned)(pt.x * DataType<_Tp>::channels) < (unsigned)(size.p[1] * channels())); - CV_DbgAssert(CV_ELEM_SIZE1(DataType<_Tp>::depth) == elemSize1()); + CV_DbgAssert(CV_ELEM_SIZE1(traits::Depth<_Tp>::value) == elemSize1()); return ((_Tp*)(data + step.p[0] * pt.y))[pt.x]; } @@ -949,7 +1190,7 @@ const _Tp& Mat::at(Point pt) const CV_DbgAssert(data); CV_DbgAssert((unsigned)pt.y < (unsigned)size.p[0]); CV_DbgAssert((unsigned)(pt.x * DataType<_Tp>::channels) < (unsigned)(size.p[1] * channels())); - CV_DbgAssert(CV_ELEM_SIZE1(DataType<_Tp>::depth) == elemSize1()); + CV_DbgAssert(CV_ELEM_SIZE1(traits::Depth<_Tp>::value) == elemSize1()); return ((const _Tp*)(data + step.p[0] * pt.y))[pt.x]; } @@ -959,7 +1200,7 @@ _Tp& Mat::at(int i0) CV_DbgAssert(dims <= 2); CV_DbgAssert(data); CV_DbgAssert((unsigned)i0 < (unsigned)(size.p[0] * size.p[1])); - CV_DbgAssert(elemSize() == CV_ELEM_SIZE(DataType<_Tp>::type)); + CV_DbgAssert(elemSize() == sizeof(_Tp)); if( isContinuous() || size.p[0] == 1 ) return ((_Tp*)data)[i0]; if( size.p[1] == 1 ) @@ -974,7 +1215,7 @@ const _Tp& Mat::at(int i0) const CV_DbgAssert(dims <= 2); CV_DbgAssert(data); CV_DbgAssert((unsigned)i0 < (unsigned)(size.p[0] * size.p[1])); - CV_DbgAssert(elemSize() == CV_ELEM_SIZE(DataType<_Tp>::type)); + CV_DbgAssert(elemSize() == sizeof(_Tp)); if( isContinuous() || size.p[0] == 1 ) return ((const _Tp*)data)[i0]; if( size.p[1] == 1 ) @@ -986,48 +1227,50 @@ const _Tp& Mat::at(int i0) const template inline _Tp& Mat::at(int i0, int i1, int i2) { - CV_DbgAssert( elemSize() == CV_ELEM_SIZE(DataType<_Tp>::type) ); + CV_DbgAssert( elemSize() == sizeof(_Tp) ); return *(_Tp*)ptr(i0, i1, i2); } template inline const _Tp& Mat::at(int i0, int i1, int i2) const { - CV_DbgAssert( elemSize() == CV_ELEM_SIZE(DataType<_Tp>::type) ); + CV_DbgAssert( elemSize() == sizeof(_Tp) ); return *(const _Tp*)ptr(i0, i1, i2); } template inline _Tp& Mat::at(const int* idx) { - CV_DbgAssert( elemSize() == CV_ELEM_SIZE(DataType<_Tp>::type) ); + CV_DbgAssert( elemSize() == sizeof(_Tp) ); return *(_Tp*)ptr(idx); } template inline const _Tp& Mat::at(const int* idx) const { - CV_DbgAssert( elemSize() == CV_ELEM_SIZE(DataType<_Tp>::type) ); + CV_DbgAssert( elemSize() == sizeof(_Tp) ); return *(const _Tp*)ptr(idx); } template inline _Tp& Mat::at(const Vec& idx) { - CV_DbgAssert( elemSize() == CV_ELEM_SIZE(DataType<_Tp>::type) ); + CV_DbgAssert( elemSize() == sizeof(_Tp) ); return *(_Tp*)ptr(idx.val); } template inline const _Tp& Mat::at(const Vec& idx) const { - CV_DbgAssert( elemSize() == CV_ELEM_SIZE(DataType<_Tp>::type) ); + CV_DbgAssert( elemSize() == sizeof(_Tp) ); return *(const _Tp*)ptr(idx.val); } template inline MatConstIterator_<_Tp> Mat::begin() const { + if (empty()) + return MatConstIterator_<_Tp>(); CV_DbgAssert( elemSize() == sizeof(_Tp) ); return MatConstIterator_<_Tp>((const Mat_<_Tp>*)this); } @@ -1035,6 +1278,8 @@ MatConstIterator_<_Tp> Mat::begin() const template inline MatConstIterator_<_Tp> Mat::end() const { + if (empty()) + return MatConstIterator_<_Tp>(); CV_DbgAssert( elemSize() == sizeof(_Tp) ); MatConstIterator_<_Tp> it((const Mat_<_Tp>*)this); it += total(); @@ -1044,6 +1289,8 @@ MatConstIterator_<_Tp> Mat::end() const template inline MatIterator_<_Tp> Mat::begin() { + if (empty()) + return MatIterator_<_Tp>(); CV_DbgAssert( elemSize() == sizeof(_Tp) ); return MatIterator_<_Tp>((Mat_<_Tp>*)this); } @@ -1051,6 +1298,8 @@ MatIterator_<_Tp> Mat::begin() template inline MatIterator_<_Tp> Mat::end() { + if (empty()) + return MatIterator_<_Tp>(); CV_DbgAssert( elemSize() == sizeof(_Tp) ); MatIterator_<_Tp> it((Mat_<_Tp>*)this); it += total(); @@ -1065,7 +1314,7 @@ void Mat::forEach(const Functor& operation) { template inline void Mat::forEach(const Functor& operation) const { // call as not const - (const_cast(this))->forEach(operation); + (const_cast(this))->forEach<_Tp>(operation); } template inline @@ -1076,16 +1325,24 @@ Mat::operator std::vector<_Tp>() const return v; } +template inline +Mat::operator std::array<_Tp, _Nm>() const +{ + std::array<_Tp, _Nm> v; + copyTo(v); + return v; +} + template inline Mat::operator Vec<_Tp, n>() const { CV_Assert( data && dims <= 2 && (rows == 1 || cols == 1) && rows + cols - 1 == n && channels() == 1 ); - if( isContinuous() && type() == DataType<_Tp>::type ) + if( isContinuous() && type() == traits::Type<_Tp>::value ) return Vec<_Tp, n>((_Tp*)data); Vec<_Tp, n> v; - Mat tmp(rows, cols, DataType<_Tp>::type, v.val); + Mat tmp(rows, cols, traits::Type<_Tp>::value, v.val); convertTo(tmp, tmp.type()); return v; } @@ -1095,10 +1352,10 @@ Mat::operator Matx<_Tp, m, n>() const { CV_Assert( data && dims <= 2 && rows == m && cols == n && channels() == 1 ); - if( isContinuous() && type() == DataType<_Tp>::type ) + if( isContinuous() && type() == traits::Type<_Tp>::value ) return Matx<_Tp, m, n>((_Tp*)data); Matx<_Tp, m, n> mtx; - Mat tmp(rows, cols, DataType<_Tp>::type, mtx.val); + Mat tmp(rows, cols, traits::Type<_Tp>::value, mtx.val); convertTo(tmp, tmp.type()); return mtx; } @@ -1108,10 +1365,10 @@ void Mat::push_back(const _Tp& elem) { if( !data ) { - *this = Mat(1, 1, DataType<_Tp>::type, (void*)&elem).clone(); + *this = Mat(1, 1, traits::Type<_Tp>::value, (void*)&elem).clone(); return; } - CV_Assert(DataType<_Tp>::type == type() && cols == 1 + CV_Assert(traits::Type<_Tp>::value == type() && cols == 1 /* && dims == 2 (cols == 1 implies dims == 2) */); const uchar* tmp = dataend + step[0]; if( !isSubmatrix() && isContinuous() && tmp <= datalimit ) @@ -1135,7 +1392,12 @@ void Mat::push_back(const MatExpr& expr) push_back(static_cast(expr)); } -#ifdef CV_CXX_MOVE_SEMANTICS + +template inline +void Mat::push_back(const std::vector<_Tp>& v) +{ + push_back(Mat(v)); +} inline Mat::Mat(Mat&& m) @@ -1165,6 +1427,9 @@ Mat::Mat(Mat&& m) inline Mat& Mat::operator = (Mat&& m) { + if (this == &m) + return *this; + release(); flags = m.flags; dims = m.dims; rows = m.rows; cols = m.cols; data = m.data; datastart = m.datastart; dataend = m.dataend; datalimit = m.datalimit; allocator = m.allocator; @@ -1195,8 +1460,6 @@ Mat& Mat::operator = (Mat&& m) return *this; } -#endif - ///////////////////////////// MatSize //////////////////////////// @@ -1204,22 +1467,36 @@ inline MatSize::MatSize(int* _p) : p(_p) {} +inline +int MatSize::dims() const +{ + return (p - 1)[0]; +} + inline Size MatSize::operator()() const { - CV_DbgAssert(p[-1] <= 2); + CV_DbgAssert(dims() <= 2); return Size(p[1], p[0]); } inline const int& MatSize::operator[](int i) const { + CV_DbgAssert(i < dims()); +#ifdef __OPENCV_BUILD + CV_DbgAssert(i >= 0); +#endif return p[i]; } inline int& MatSize::operator[](int i) { + CV_DbgAssert(i < dims()); +#ifdef __OPENCV_BUILD + CV_DbgAssert(i >= 0); +#endif return p[i]; } @@ -1232,8 +1509,8 @@ MatSize::operator const int*() const inline bool MatSize::operator == (const MatSize& sz) const { - int d = p[-1]; - int dsz = sz.p[-1]; + int d = dims(); + int dsz = sz.dims(); if( d != dsz ) return false; if( d == 2 ) @@ -1300,47 +1577,47 @@ template inline Mat_<_Tp>::Mat_() : Mat() { - flags = (flags & ~CV_MAT_TYPE_MASK) | DataType<_Tp>::type; + flags = (flags & ~CV_MAT_TYPE_MASK) + traits::Type<_Tp>::value; } template inline Mat_<_Tp>::Mat_(int _rows, int _cols) - : Mat(_rows, _cols, DataType<_Tp>::type) + : Mat(_rows, _cols, traits::Type<_Tp>::value) { } template inline Mat_<_Tp>::Mat_(int _rows, int _cols, const _Tp& value) - : Mat(_rows, _cols, DataType<_Tp>::type) + : Mat(_rows, _cols, traits::Type<_Tp>::value) { *this = value; } template inline Mat_<_Tp>::Mat_(Size _sz) - : Mat(_sz.height, _sz.width, DataType<_Tp>::type) + : Mat(_sz.height, _sz.width, traits::Type<_Tp>::value) {} template inline Mat_<_Tp>::Mat_(Size _sz, const _Tp& value) - : Mat(_sz.height, _sz.width, DataType<_Tp>::type) + : Mat(_sz.height, _sz.width, traits::Type<_Tp>::value) { *this = value; } template inline Mat_<_Tp>::Mat_(int _dims, const int* _sz) - : Mat(_dims, _sz, DataType<_Tp>::type) + : Mat(_dims, _sz, traits::Type<_Tp>::value) {} template inline Mat_<_Tp>::Mat_(int _dims, const int* _sz, const _Tp& _s) - : Mat(_dims, _sz, DataType<_Tp>::type, Scalar(_s)) + : Mat(_dims, _sz, traits::Type<_Tp>::value, Scalar(_s)) {} template inline Mat_<_Tp>::Mat_(int _dims, const int* _sz, _Tp* _data, const size_t* _steps) - : Mat(_dims, _sz, DataType<_Tp>::type, _data, _steps) + : Mat(_dims, _sz, traits::Type<_Tp>::value, _data, _steps) {} template inline @@ -1348,11 +1625,16 @@ Mat_<_Tp>::Mat_(const Mat_<_Tp>& m, const Range* ranges) : Mat(m, ranges) {} +template inline +Mat_<_Tp>::Mat_(const Mat_<_Tp>& m, const std::vector& ranges) + : Mat(m, ranges) +{} + template inline Mat_<_Tp>::Mat_(const Mat& m) : Mat() { - flags = (flags & ~CV_MAT_TYPE_MASK) | DataType<_Tp>::type; + flags = (flags & ~CV_MAT_TYPE_MASK) + traits::Type<_Tp>::value; *this = m; } @@ -1363,7 +1645,7 @@ Mat_<_Tp>::Mat_(const Mat_& m) template inline Mat_<_Tp>::Mat_(int _rows, int _cols, _Tp* _data, size_t steps) - : Mat(_rows, _cols, DataType<_Tp>::type, _data, steps) + : Mat(_rows, _cols, traits::Type<_Tp>::value, _data, steps) {} template inline @@ -1378,7 +1660,7 @@ Mat_<_Tp>::Mat_(const Mat_& m, const Rect& roi) template template inline Mat_<_Tp>::Mat_(const Vec::channel_type, n>& vec, bool copyData) - : Mat(n / DataType<_Tp>::channels, 1, DataType<_Tp>::type, (void*)&vec) + : Mat(n / DataType<_Tp>::channels, 1, traits::Type<_Tp>::value, (void*)&vec) { CV_Assert(n%DataType<_Tp>::channels == 0); if( copyData ) @@ -1387,7 +1669,7 @@ Mat_<_Tp>::Mat_(const Vec::channel_type, n>& vec, bool co template template inline Mat_<_Tp>::Mat_(const Matx::channel_type, m, n>& M, bool copyData) - : Mat(m, n / DataType<_Tp>::channels, DataType<_Tp>::type, (void*)&M) + : Mat(m, n / DataType<_Tp>::channels, traits::Type<_Tp>::value, (void*)&M) { CV_Assert(n % DataType<_Tp>::channels == 0); if( copyData ) @@ -1396,7 +1678,7 @@ Mat_<_Tp>::Mat_(const Matx::channel_type, m, n>& M, bool template inline Mat_<_Tp>::Mat_(const Point_::channel_type>& pt, bool copyData) - : Mat(2 / DataType<_Tp>::channels, 1, DataType<_Tp>::type, (void*)&pt) + : Mat(2 / DataType<_Tp>::channels, 1, traits::Type<_Tp>::value, (void*)&pt) { CV_Assert(2 % DataType<_Tp>::channels == 0); if( copyData ) @@ -1405,7 +1687,7 @@ Mat_<_Tp>::Mat_(const Point_::channel_type>& pt, bool cop template inline Mat_<_Tp>::Mat_(const Point3_::channel_type>& pt, bool copyData) - : Mat(3 / DataType<_Tp>::channels, 1, DataType<_Tp>::type, (void*)&pt) + : Mat(3 / DataType<_Tp>::channels, 1, traits::Type<_Tp>::value, (void*)&pt) { CV_Assert(3 % DataType<_Tp>::channels == 0); if( copyData ) @@ -1422,19 +1704,39 @@ Mat_<_Tp>::Mat_(const std::vector<_Tp>& vec, bool copyData) : Mat(vec, copyData) {} +template inline +Mat_<_Tp>::Mat_(std::initializer_list<_Tp> list) + : Mat(list) +{} + +template inline +Mat_<_Tp>::Mat_(const std::initializer_list sizes, std::initializer_list<_Tp> list) + : Mat(sizes, list) +{} + +template template inline +Mat_<_Tp>::Mat_(const std::array<_Tp, _Nm>& arr, bool copyData) + : Mat(arr, copyData) +{} + template inline Mat_<_Tp>& Mat_<_Tp>::operator = (const Mat& m) { - if( DataType<_Tp>::type == m.type() ) + if (m.empty()) + { + release(); + return *this; + } + if( traits::Type<_Tp>::value == m.type() ) { Mat::operator = (m); return *this; } - if( DataType<_Tp>::depth == m.depth() ) + if( traits::Depth<_Tp>::value == m.depth() ) { return (*this = m.reshape(DataType<_Tp>::channels, m.dims, 0)); } - CV_DbgAssert(DataType<_Tp>::channels == m.channels()); + CV_Assert(DataType<_Tp>::channels == m.channels() || m.empty()); m.convertTo(*this, type()); return *this; } @@ -1457,19 +1759,28 @@ Mat_<_Tp>& Mat_<_Tp>::operator = (const _Tp& s) template inline void Mat_<_Tp>::create(int _rows, int _cols) { - Mat::create(_rows, _cols, DataType<_Tp>::type); + Mat::create(_rows, _cols, traits::Type<_Tp>::value); } template inline void Mat_<_Tp>::create(Size _sz) { - Mat::create(_sz, DataType<_Tp>::type); + Mat::create(_sz, traits::Type<_Tp>::value); } template inline void Mat_<_Tp>::create(int _dims, const int* _sz) { - Mat::create(_dims, _sz, DataType<_Tp>::type); + Mat::create(_dims, _sz, traits::Type<_Tp>::value); +} + +template inline +void Mat_<_Tp>::release() +{ + Mat::release(); +#ifdef _DEBUG + flags = (flags & ~CV_MAT_TYPE_MASK) + traits::Type<_Tp>::value; +#endif } template inline @@ -1481,7 +1792,7 @@ Mat_<_Tp> Mat_<_Tp>::cross(const Mat_& m) const template template inline Mat_<_Tp>::operator Mat_() const { - return Mat_(*this); + return Mat_(static_cast(*this)); } template inline @@ -1525,15 +1836,15 @@ size_t Mat_<_Tp>::elemSize1() const template inline int Mat_<_Tp>::type() const { - CV_DbgAssert( Mat::type() == DataType<_Tp>::type ); - return DataType<_Tp>::type; + CV_DbgAssert( Mat::type() == traits::Type<_Tp>::value ); + return traits::Type<_Tp>::value; } template inline int Mat_<_Tp>::depth() const { - CV_DbgAssert( Mat::depth() == DataType<_Tp>::depth ); - return DataType<_Tp>::depth; + CV_DbgAssert( Mat::depth() == traits::Depth<_Tp>::value ); + return traits::Depth<_Tp>::value; } template inline @@ -1579,17 +1890,23 @@ Mat_<_Tp> Mat_<_Tp>::operator()( const Range* ranges ) const return Mat_<_Tp>(*this, ranges); } +template inline +Mat_<_Tp> Mat_<_Tp>::operator()(const std::vector& ranges) const +{ + return Mat_<_Tp>(*this, ranges); +} + template inline _Tp* Mat_<_Tp>::operator [](int y) { - CV_DbgAssert( 0 <= y && y < rows ); + CV_DbgAssert( 0 <= y && y < size.p[0] ); return (_Tp*)(data + y*step.p[0]); } template inline const _Tp* Mat_<_Tp>::operator [](int y) const { - CV_DbgAssert( 0 <= y && y < rows ); + CV_DbgAssert( 0 <= y && y < size.p[0] ); return (const _Tp*)(data + y*step.p[0]); } @@ -1600,7 +1917,7 @@ _Tp& Mat_<_Tp>::operator ()(int i0, int i1) CV_DbgAssert(data); CV_DbgAssert((unsigned)i0 < (unsigned)size.p[0]); CV_DbgAssert((unsigned)i1 < (unsigned)size.p[1]); - CV_DbgAssert(type() == DataType<_Tp>::type); + CV_DbgAssert(type() == traits::Type<_Tp>::value); return ((_Tp*)(data + step.p[0] * i0))[i1]; } @@ -1611,7 +1928,7 @@ const _Tp& Mat_<_Tp>::operator ()(int i0, int i1) const CV_DbgAssert(data); CV_DbgAssert((unsigned)i0 < (unsigned)size.p[0]); CV_DbgAssert((unsigned)i1 < (unsigned)size.p[1]); - CV_DbgAssert(type() == DataType<_Tp>::type); + CV_DbgAssert(type() == traits::Type<_Tp>::value); return ((const _Tp*)(data + step.p[0] * i0))[i1]; } @@ -1622,7 +1939,7 @@ _Tp& Mat_<_Tp>::operator ()(Point pt) CV_DbgAssert(data); CV_DbgAssert((unsigned)pt.y < (unsigned)size.p[0]); CV_DbgAssert((unsigned)pt.x < (unsigned)size.p[1]); - CV_DbgAssert(type() == DataType<_Tp>::type); + CV_DbgAssert(type() == traits::Type<_Tp>::value); return ((_Tp*)(data + step.p[0] * pt.y))[pt.x]; } @@ -1633,7 +1950,7 @@ const _Tp& Mat_<_Tp>::operator ()(Point pt) const CV_DbgAssert(data); CV_DbgAssert((unsigned)pt.y < (unsigned)size.p[0]); CV_DbgAssert((unsigned)pt.x < (unsigned)size.p[1]); - CV_DbgAssert(type() == DataType<_Tp>::type); + CV_DbgAssert(type() == traits::Type<_Tp>::value); return ((const _Tp*)(data + step.p[0] * pt.y))[pt.x]; } @@ -1693,6 +2010,14 @@ Mat_<_Tp>::operator std::vector<_Tp>() const return v; } +template template inline +Mat_<_Tp>::operator std::array<_Tp, _Nm>() const +{ + std::array<_Tp, _Nm> a; + copyTo(a); + return a; +} + template template inline Mat_<_Tp>::operator Vec::channel_type, n>() const { @@ -1755,18 +2080,16 @@ void Mat_<_Tp>::forEach(const Functor& operation) const { Mat::forEach<_Tp, Functor>(operation); } -#ifdef CV_CXX_MOVE_SEMANTICS - template inline Mat_<_Tp>::Mat_(Mat_&& m) - : Mat(m) + : Mat(std::move(m)) { } template inline Mat_<_Tp>& Mat_<_Tp>::operator = (Mat_&& m) { - Mat::operator = (m); + Mat::operator = (std::move(m)); return *this; } @@ -1774,19 +2097,24 @@ template inline Mat_<_Tp>::Mat_(Mat&& m) : Mat() { - flags = (flags & ~CV_MAT_TYPE_MASK) | DataType<_Tp>::type; - *this = m; + flags = (flags & ~CV_MAT_TYPE_MASK) + traits::Type<_Tp>::value; + *this = std::move(m); } template inline Mat_<_Tp>& Mat_<_Tp>::operator = (Mat&& m) { - if( DataType<_Tp>::type == m.type() ) + if (m.empty()) + { + release(); + return *this; + } + if( traits::Type<_Tp>::value == m.type() ) { Mat::operator = ((Mat&&)m); return *this; } - if( DataType<_Tp>::depth == m.depth() ) + if( traits::Depth<_Tp>::value == m.depth() ) { Mat::operator = ((Mat&&)m.reshape(DataType<_Tp>::channels, m.dims, 0)); return *this; @@ -1800,11 +2128,10 @@ template inline Mat_<_Tp>::Mat_(MatExpr&& e) : Mat() { - flags = (flags & ~CV_MAT_TYPE_MASK) | DataType<_Tp>::type; + flags = (flags & ~CV_MAT_TYPE_MASK) + traits::Type<_Tp>::value; *this = Mat(e); } -#endif ///////////////////////////// SparseMat ///////////////////////////// @@ -2137,21 +2464,21 @@ SparseMatConstIterator_<_Tp> SparseMat::end() const template inline SparseMat_<_Tp>::SparseMat_() { - flags = MAGIC_VAL | DataType<_Tp>::type; + flags = MAGIC_VAL + traits::Type<_Tp>::value; } template inline SparseMat_<_Tp>::SparseMat_(int _dims, const int* _sizes) - : SparseMat(_dims, _sizes, DataType<_Tp>::type) + : SparseMat(_dims, _sizes, traits::Type<_Tp>::value) {} template inline SparseMat_<_Tp>::SparseMat_(const SparseMat& m) { - if( m.type() == DataType<_Tp>::type ) + if( m.type() == traits::Type<_Tp>::value ) *this = (const SparseMat_<_Tp>&)m; else - m.convertTo(*this, DataType<_Tp>::type); + m.convertTo(*this, traits::Type<_Tp>::value); } template inline @@ -2186,9 +2513,9 @@ SparseMat_<_Tp>& SparseMat_<_Tp>::operator = (const SparseMat_<_Tp>& m) template inline SparseMat_<_Tp>& SparseMat_<_Tp>::operator = (const SparseMat& m) { - if( m.type() == DataType<_Tp>::type ) + if( m.type() == traits::Type<_Tp>::value ) return (*this = (const SparseMat_<_Tp>&)m); - m.convertTo(*this, DataType<_Tp>::type); + m.convertTo(*this, traits::Type<_Tp>::value); return *this; } @@ -2209,19 +2536,19 @@ SparseMat_<_Tp> SparseMat_<_Tp>::clone() const template inline void SparseMat_<_Tp>::create(int _dims, const int* _sizes) { - SparseMat::create(_dims, _sizes, DataType<_Tp>::type); + SparseMat::create(_dims, _sizes, traits::Type<_Tp>::value); } template inline int SparseMat_<_Tp>::type() const { - return DataType<_Tp>::type; + return traits::Type<_Tp>::value; } template inline int SparseMat_<_Tp>::depth() const { - return DataType<_Tp>::depth; + return traits::Depth<_Tp>::value; } template inline @@ -2321,6 +2648,7 @@ MatConstIterator::MatConstIterator(const Mat* _m) { if( m && m->isContinuous() ) { + CV_Assert(!m->empty()); sliceStart = m->ptr(); sliceEnd = sliceStart + m->total()*elemSize; } @@ -2334,6 +2662,7 @@ MatConstIterator::MatConstIterator(const Mat* _m, int _row, int _col) CV_Assert(m && m->dims <= 2); if( m->isContinuous() ) { + CV_Assert(!m->empty()); sliceStart = m->ptr(); sliceEnd = sliceStart + m->total()*elemSize; } @@ -2348,6 +2677,7 @@ MatConstIterator::MatConstIterator(const Mat* _m, Point _pt) CV_Assert(m && m->dims <= 2); if( m->isContinuous() ) { + CV_Assert(!m->empty()); sliceStart = m->ptr(); sliceEnd = sliceStart + m->total()*elemSize; } @@ -2474,7 +2804,7 @@ ptrdiff_t operator - (const MatConstIterator& b, const MatConstIterator& a) if( a.m != b.m ) return ((size_t)(-1) >> 1); if( a.sliceEnd == b.sliceEnd ) - return (b.ptr - a.ptr)/b.elemSize; + return (b.ptr - a.ptr)/static_cast(b.elemSize); return b.lpos() - a.lpos(); } @@ -2543,7 +2873,7 @@ MatConstIterator_<_Tp>& MatConstIterator_<_Tp>::operator = (const MatConstIterat } template inline -_Tp MatConstIterator_<_Tp>::operator *() const +const _Tp& MatConstIterator_<_Tp>::operator *() const { return *(_Tp*)(this->ptr); } @@ -2649,7 +2979,7 @@ MatConstIterator_<_Tp> operator - (const MatConstIterator_<_Tp>& a, ptrdiff_t of } template inline -_Tp MatConstIterator_<_Tp>::operator [](ptrdiff_t i) const +const _Tp& MatConstIterator_<_Tp>::operator [](ptrdiff_t i) const { return *(_Tp*)MatConstIterator::operator [](i); } @@ -2922,7 +3252,7 @@ template inline SparseMatConstIterator_<_Tp>::SparseMatConstIterator_(const SparseMat* _m) : SparseMatConstIterator(_m) { - CV_Assert( _m->type() == DataType<_Tp>::type ); + CV_Assert( _m->type() == traits::Type<_Tp>::value ); } template inline @@ -3058,50 +3388,50 @@ Mat& Mat::operator = (const MatExpr& e) template inline Mat_<_Tp>::Mat_(const MatExpr& e) { - e.op->assign(e, *this, DataType<_Tp>::type); + e.op->assign(e, *this, traits::Type<_Tp>::value); } template inline Mat_<_Tp>& Mat_<_Tp>::operator = (const MatExpr& e) { - e.op->assign(e, *this, DataType<_Tp>::type); + e.op->assign(e, *this, traits::Type<_Tp>::value); return *this; } template inline MatExpr Mat_<_Tp>::zeros(int rows, int cols) { - return Mat::zeros(rows, cols, DataType<_Tp>::type); + return Mat::zeros(rows, cols, traits::Type<_Tp>::value); } template inline MatExpr Mat_<_Tp>::zeros(Size sz) { - return Mat::zeros(sz, DataType<_Tp>::type); + return Mat::zeros(sz, traits::Type<_Tp>::value); } template inline MatExpr Mat_<_Tp>::ones(int rows, int cols) { - return Mat::ones(rows, cols, DataType<_Tp>::type); + return Mat::ones(rows, cols, traits::Type<_Tp>::value); } template inline MatExpr Mat_<_Tp>::ones(Size sz) { - return Mat::ones(sz, DataType<_Tp>::type); + return Mat::ones(sz, traits::Type<_Tp>::value); } template inline MatExpr Mat_<_Tp>::eye(int rows, int cols) { - return Mat::eye(rows, cols, DataType<_Tp>::type); + return Mat::eye(rows, cols, traits::Type<_Tp>::value); } template inline MatExpr Mat_<_Tp>::eye(Size sz) { - return Mat::eye(sz, DataType<_Tp>::type); + return Mat::eye(sz, traits::Type<_Tp>::value); } inline @@ -3127,7 +3457,7 @@ template inline MatExpr::operator Mat_<_Tp>() const { Mat_<_Tp> m; - op->assign(*this, m, DataType<_Tp>::type); + op->assign(*this, m, traits::Type<_Tp>::value); return m; } @@ -3360,7 +3690,7 @@ UMat::UMat(const UMat& m) template inline UMat::UMat(const std::vector<_Tp>& vec, bool copyData) -: flags(MAGIC_VAL | DataType<_Tp>::type | CV_MAT_CONT_FLAG), dims(2), rows((int)vec.size()), +: flags(MAGIC_VAL + traits::Type<_Tp>::value + CV_MAT_CONT_FLAG), dims(2), rows((int)vec.size()), cols(1), allocator(0), usageFlags(USAGE_DEFAULT), u(0), offset(0), size(&rows) { if(vec.empty()) @@ -3371,10 +3701,9 @@ cols(1), allocator(0), usageFlags(USAGE_DEFAULT), u(0), offset(0), size(&rows) CV_Error(Error::StsNotImplemented, ""); } else - Mat((int)vec.size(), 1, DataType<_Tp>::type, (uchar*)&vec[0]).copyTo(*this); + Mat((int)vec.size(), 1, traits::Type<_Tp>::value, (uchar*)&vec[0]).copyTo(*this); } - inline UMat& UMat::operator = (const UMat& m) { @@ -3505,6 +3834,12 @@ UMat UMat::operator()(const Range* ranges) const return UMat(*this, ranges); } +inline +UMat UMat::operator()(const std::vector& ranges) const +{ + return UMat(*this, ranges); +} + inline bool UMat::isContinuous() const { @@ -3520,7 +3855,9 @@ bool UMat::isSubmatrix() const inline size_t UMat::elemSize() const { - return dims > 0 ? step.p[dims - 1] : 0; + size_t res = dims > 0 ? step.p[dims - 1] : 0; + CV_DbgAssert(res != 0); + return res; } inline @@ -3556,7 +3893,7 @@ size_t UMat::step1(int i) const inline bool UMat::empty() const { - return u == 0 || total() == 0; + return u == 0 || total() == 0 || dims == 0; } inline @@ -3570,8 +3907,6 @@ size_t UMat::total() const return p; } -#ifdef CV_CXX_MOVE_SEMANTICS - inline UMat::UMat(UMat&& m) : flags(m.flags), dims(m.dims), rows(m.rows), cols(m.cols), allocator(m.allocator), @@ -3599,6 +3934,8 @@ UMat::UMat(UMat&& m) inline UMat& UMat::operator = (UMat&& m) { + if (this == &m) + return *this; release(); flags = m.flags; dims = m.dims; rows = m.rows; cols = m.cols; allocator = m.allocator; usageFlags = m.usageFlags; @@ -3630,8 +3967,6 @@ UMat& UMat::operator = (UMat&& m) return *this; } -#endif - inline bool UMatData::hostCopyObsolete() const { return (flags & HOST_COPY_OBSOLETE) != 0; } inline bool UMatData::deviceCopyObsolete() const { return (flags & DEVICE_COPY_OBSOLETE) != 0; } @@ -3663,11 +3998,17 @@ inline void UMatData::markDeviceCopyObsolete(bool flag) flags &= ~DEVICE_COPY_OBSOLETE; } -inline UMatDataAutoLock::UMatDataAutoLock(UMatData* _u) : u(_u) { u->lock(); } -inline UMatDataAutoLock::~UMatDataAutoLock() { u->unlock(); } - //! @endcond } //cv +#ifdef _MSC_VER +#pragma warning( pop ) +#endif + +#ifdef CV_DISABLE_CLANG_ENUM_WARNINGS +#undef CV_DISABLE_CLANG_ENUM_WARNINGS +#pragma clang diagnostic pop +#endif + #endif diff --git a/IPL/include/opencv/opencv2/core/matx.hpp b/IPL/include/opencv/opencv2/core/matx.hpp index e4d72f7..6a22b17 100644 --- a/IPL/include/opencv/opencv2/core/matx.hpp +++ b/IPL/include/opencv/opencv2/core/matx.hpp @@ -41,8 +41,8 @@ // //M*/ -#ifndef __OPENCV_CORE_MATX_HPP__ -#define __OPENCV_CORE_MATX_HPP__ +#ifndef OPENCV_CORE_MATX_HPP +#define OPENCV_CORE_MATX_HPP #ifndef __cplusplus # error matx.hpp header must be compiled as C++ @@ -53,6 +53,8 @@ #include "opencv2/core/traits.hpp" #include "opencv2/core/saturate.hpp" +#include + namespace cv { @@ -62,13 +64,14 @@ namespace cv ////////////////////////////// Small Matrix /////////////////////////// //! @cond IGNORED -struct CV_EXPORTS Matx_AddOp {}; -struct CV_EXPORTS Matx_SubOp {}; -struct CV_EXPORTS Matx_ScaleOp {}; -struct CV_EXPORTS Matx_MulOp {}; -struct CV_EXPORTS Matx_DivOp {}; -struct CV_EXPORTS Matx_MatMulOp {}; -struct CV_EXPORTS Matx_TOp {}; +// FIXIT Remove this (especially CV_EXPORTS modifier) +struct CV_EXPORTS Matx_AddOp { Matx_AddOp() {} Matx_AddOp(const Matx_AddOp&) {} }; +struct CV_EXPORTS Matx_SubOp { Matx_SubOp() {} Matx_SubOp(const Matx_SubOp&) {} }; +struct CV_EXPORTS Matx_ScaleOp { Matx_ScaleOp() {} Matx_ScaleOp(const Matx_ScaleOp&) {} }; +struct CV_EXPORTS Matx_MulOp { Matx_MulOp() {} Matx_MulOp(const Matx_MulOp&) {} }; +struct CV_EXPORTS Matx_DivOp { Matx_DivOp() {} Matx_DivOp(const Matx_DivOp&) {} }; +struct CV_EXPORTS Matx_MatMulOp { Matx_MatMulOp() {} Matx_MatMulOp(const Matx_MatMulOp&) {} }; +struct CV_EXPORTS Matx_TOp { Matx_TOp() {} Matx_TOp(const Matx_TOp&) {} }; //! @endcond /** @brief Template class for small matrices whose type and size are known at compilation time @@ -77,21 +80,33 @@ If you need a more flexible type, use Mat . The elements of the matrix M are acc M(i,j) notation. Most of the common matrix operations (see also @ref MatrixExpressions ) are available. To do an operation on Matx that is not implemented, you can easily convert the matrix to Mat and backwards: -@code +@code{.cpp} Matx33f m(1, 2, 3, 4, 5, 6, 7, 8, 9); cout << sum(Mat(m*m.t())) << endl; - @endcode +@endcode +Except of the plain constructor which takes a list of elements, Matx can be initialized from a C-array: +@code{.cpp} + float values[] = { 1, 2, 3}; + Matx31f m(values); +@endcode +In case if C++11 features are available, std::initializer_list can be also used to initialize Matx: +@code{.cpp} + Matx31f m = { 1, 2, 3}; +@endcode */ template class Matx { public: - enum { depth = DataType<_Tp>::depth, + enum { rows = m, cols = n, channels = rows*cols, +#ifdef OPENCV_TRAITS_ENABLE_DEPRECATED + depth = traits::Type<_Tp>::value, type = CV_MAKETYPE(depth, channels), +#endif shortdim = (m < n ? m : n) }; @@ -102,7 +117,7 @@ template class Matx //! default constructor Matx(); - Matx(_Tp v0); //!< 1x1 matrix + explicit Matx(_Tp v0); //!< 1x1 matrix Matx(_Tp v0, _Tp v1); //!< 1x2 or 2x1 matrix Matx(_Tp v0, _Tp v1, _Tp v2); //!< 1x3 or 3x1 matrix Matx(_Tp v0, _Tp v1, _Tp v2, _Tp v3); //!< 1x4, 2x2 or 4x1 matrix @@ -125,6 +140,8 @@ template class Matx _Tp v12, _Tp v13, _Tp v14, _Tp v15); //!< 1x16, 4x4 or 16x1 matrix explicit Matx(const _Tp* vals); //!< initialize from a plain array + Matx(std::initializer_list<_Tp>); //!< initialize from an initializer list + static Matx all(_Tp alpha); static Matx zeros(); static Matx ones(); @@ -146,7 +163,7 @@ template class Matx template Matx<_Tp, m1, n1> reshape() const; //! extract part of the matrix - template Matx<_Tp, m1, n1> get_minor(int i, int j) const; + template Matx<_Tp, m1, n1> get_minor(int base_row, int base_col) const; //! extract the matrix row Matx<_Tp, 1, n> row(int i) const; @@ -174,8 +191,8 @@ template class Matx Matx<_Tp, m, n> div(const Matx<_Tp, m, n>& a) const; //! element access - const _Tp& operator ()(int i, int j) const; - _Tp& operator ()(int i, int j); + const _Tp& operator ()(int row, int col) const; + _Tp& operator ()(int row, int col); //! 1D element access const _Tp& operator ()(int i) const; @@ -242,13 +259,23 @@ template class DataType< Matx<_Tp, m, n> > typedef value_type vec_type; enum { generic_type = 0, - depth = DataType::depth, channels = m * n, - fmt = DataType::fmt + ((channels - 1) << 8), - type = CV_MAKETYPE(depth, channels) + fmt = traits::SafeFmt::fmt + ((channels - 1) << 8) +#ifdef OPENCV_TRAITS_ENABLE_DEPRECATED + ,depth = DataType::depth + ,type = CV_MAKETYPE(depth, channels) +#endif }; }; +namespace traits { +template +struct Depth< Matx<_Tp, m, n> > { enum { value = Depth<_Tp>::value }; }; +template +struct Type< Matx<_Tp, m, n> > { enum { value = CV_MAKETYPE(Depth<_Tp>::value, n*m) }; }; +} // namespace + + /** @brief Comma-separated Matrix Initializer */ template class MatxCommaInitializer @@ -306,9 +333,13 @@ template class Vec : public Matx<_Tp, cn, 1> { public: typedef _Tp value_type; - enum { depth = Matx<_Tp, cn, 1>::depth, + enum { channels = cn, - type = CV_MAKETYPE(depth, channels) +#ifdef OPENCV_TRAITS_ENABLE_DEPRECATED + depth = Matx<_Tp, cn, 1>::depth, + type = CV_MAKETYPE(depth, channels), +#endif + _dummy_enum_finalizer = 0 }; //! default constructor @@ -327,6 +358,8 @@ template class Vec : public Matx<_Tp, cn, 1> Vec(_Tp v0, _Tp v1, _Tp v2, _Tp v3, _Tp v4, _Tp v5, _Tp v6, _Tp v7, _Tp v8, _Tp v9, _Tp v10, _Tp v11, _Tp v12, _Tp v13); //!< 14-element vector constructor explicit Vec(const _Tp* values); + Vec(std::initializer_list<_Tp>); + Vec(const Vec<_Tp, cn>& v); static Vec all(_Tp alpha); @@ -352,6 +385,10 @@ template class Vec : public Matx<_Tp, cn, 1> const _Tp& operator ()(int i) const; _Tp& operator ()(int i); +#ifdef CV_CXX11 + Vec<_Tp, cn>& operator=(const Vec<_Tp, cn>& rhs) = default; +#endif + Vec(const Matx<_Tp, cn, 1>& a, const Matx<_Tp, cn, 1>& b, Matx_AddOp); Vec(const Matx<_Tp, cn, 1>& a, const Matx<_Tp, cn, 1>& b, Matx_SubOp); template Vec(const Matx<_Tp, cn, 1>& a, _T2 alpha, Matx_ScaleOp); @@ -401,13 +438,24 @@ template class DataType< Vec<_Tp, cn> > typedef value_type vec_type; enum { generic_type = 0, - depth = DataType::depth, channels = cn, fmt = DataType::fmt + ((channels - 1) << 8), - type = CV_MAKETYPE(depth, channels) +#ifdef OPENCV_TRAITS_ENABLE_DEPRECATED + depth = DataType::depth, + type = CV_MAKETYPE(depth, channels), +#endif + _dummy_enum_finalizer = 0 }; }; +namespace traits { +template +struct Depth< Vec<_Tp, cn> > { enum { value = Depth<_Tp>::value }; }; +template +struct Type< Vec<_Tp, cn> > { enum { value = CV_MAKETYPE(Depth<_Tp>::value, cn) }; }; +} // namespace + + /** @brief Comma-separated Vec Initializer */ template class VecCommaInitializer : public MatxCommaInitializer<_Tp, m, 1> @@ -438,7 +486,7 @@ template struct Matx_DetOp return p; for( int i = 0; i < m; i++ ) p *= temp(i, i); - return 1./p; + return p; } }; @@ -590,11 +638,12 @@ Matx<_Tp,m,n>::Matx(_Tp v0, _Tp v1, _Tp v2, _Tp v3, _Tp v4, _Tp v5, _Tp v6, _Tp template inline Matx<_Tp,m,n>::Matx(_Tp v0, _Tp v1, _Tp v2, _Tp v3, _Tp v4, _Tp v5, _Tp v6, _Tp v7, _Tp v8, _Tp v9, _Tp v10, _Tp v11, _Tp v12, _Tp v13) { - CV_StaticAssert(channels == 14, "Matx should have at least 14 elements."); + CV_StaticAssert(channels >= 14, "Matx should have at least 14 elements."); val[0] = v0; val[1] = v1; val[2] = v2; val[3] = v3; val[4] = v4; val[5] = v5; val[6] = v6; val[7] = v7; val[8] = v8; val[9] = v9; val[10] = v10; val[11] = v11; val[12] = v12; val[13] = v13; + for (int i = 14; i < channels; i++) val[i] = _Tp(0); } @@ -615,6 +664,17 @@ Matx<_Tp, m, n>::Matx(const _Tp* values) for( int i = 0; i < channels; i++ ) val[i] = values[i]; } +template inline +Matx<_Tp, m, n>::Matx(std::initializer_list<_Tp> list) +{ + CV_DbgAssert(list.size() == channels); + int i = 0; + for(const auto& elem : list) + { + val[i++] = elem; + } +} + template inline Matx<_Tp, m, n> Matx<_Tp, m, n>::all(_Tp alpha) { @@ -686,13 +746,13 @@ Matx<_Tp, m1, n1> Matx<_Tp, m, n>::reshape() const template template inline -Matx<_Tp, m1, n1> Matx<_Tp, m, n>::get_minor(int i, int j) const +Matx<_Tp, m1, n1> Matx<_Tp, m, n>::get_minor(int base_row, int base_col) const { - CV_DbgAssert(0 <= i && i+m1 <= m && 0 <= j && j+n1 <= n); + CV_DbgAssert(0 <= base_row && base_row+m1 <= m && 0 <= base_col && base_col+n1 <= n); Matx<_Tp, m1, n1> s; for( int di = 0; di < m1; di++ ) for( int dj = 0; dj < n1; dj++ ) - s(di, dj) = (*this)(i+di, j+dj); + s(di, dj) = (*this)(base_row+di, base_col+dj); return s; } @@ -723,17 +783,17 @@ typename Matx<_Tp, m, n>::diag_type Matx<_Tp, m, n>::diag() const } template inline -const _Tp& Matx<_Tp, m, n>::operator()(int i, int j) const +const _Tp& Matx<_Tp, m, n>::operator()(int row_idx, int col_idx) const { - CV_DbgAssert( (unsigned)i < (unsigned)m && (unsigned)j < (unsigned)n ); - return this->val[i*n + j]; + CV_DbgAssert( (unsigned)row_idx < (unsigned)m && (unsigned)col_idx < (unsigned)n ); + return this->val[row_idx*n + col_idx]; } template inline -_Tp& Matx<_Tp, m, n>::operator ()(int i, int j) +_Tp& Matx<_Tp, m, n>::operator ()(int row_idx, int col_idx) { - CV_DbgAssert( (unsigned)i < (unsigned)m && (unsigned)j < (unsigned)n ); - return val[i*n + j]; + CV_DbgAssert( (unsigned)row_idx < (unsigned)m && (unsigned)col_idx < (unsigned)n ); + return val[row_idx*n + col_idx]; } template inline @@ -956,6 +1016,10 @@ template inline Vec<_Tp, cn>::Vec(const _Tp* values) : Matx<_Tp, cn, 1>(values) {} +template inline +Vec<_Tp, cn>::Vec(std::initializer_list<_Tp> list) + : Matx<_Tp, cn, 1>(list) {} + template inline Vec<_Tp, cn>::Vec(const Vec<_Tp, cn>& m) : Matx<_Tp, cn, 1>(m.val) {} @@ -1080,7 +1144,7 @@ Vec<_Tp, cn> normalize(const Vec<_Tp, cn>& v) -//////////////////////////////// matx comma initializer ////////////////////////////////// +//////////////////////////////// vec comma initializer ////////////////////////////////// template static inline @@ -1205,6 +1269,34 @@ Matx<_Tp, m, n> operator * (double alpha, const Matx<_Tp, m, n>& a) return Matx<_Tp, m, n>(a, alpha, Matx_ScaleOp()); } +template static inline +Matx<_Tp, m, n>& operator /= (Matx<_Tp, m, n>& a, float alpha) +{ + for( int i = 0; i < m*n; i++ ) + a.val[i] = a.val[i] / alpha; + return a; +} + +template static inline +Matx<_Tp, m, n>& operator /= (Matx<_Tp, m, n>& a, double alpha) +{ + for( int i = 0; i < m*n; i++ ) + a.val[i] = a.val[i] / alpha; + return a; +} + +template static inline +Matx<_Tp, m, n> operator / (const Matx<_Tp, m, n>& a, float alpha) +{ + return Matx<_Tp, m, n>(a, 1.f/alpha, Matx_ScaleOp()); +} + +template static inline +Matx<_Tp, m, n> operator / (const Matx<_Tp, m, n>& a, double alpha) +{ + return Matx<_Tp, m, n>(a, 1./alpha, Matx_ScaleOp()); +} + template static inline Matx<_Tp, m, n> operator - (const Matx<_Tp, m, n>& a) { @@ -1404,4 +1496,4 @@ template inline Vec<_Tp, 4>& operator *= (Vec<_Tp, 4>& v1, const V } // cv -#endif // __OPENCV_CORE_MATX_HPP__ +#endif // OPENCV_CORE_MATX_HPP diff --git a/IPL/include/opencv/opencv2/core/neon_utils.hpp b/IPL/include/opencv/opencv2/core/neon_utils.hpp index adb750f..573ba99 100644 --- a/IPL/include/opencv/opencv2/core/neon_utils.hpp +++ b/IPL/include/opencv/opencv2/core/neon_utils.hpp @@ -39,8 +39,8 @@ // //M*/ -#ifndef __OPENCV_HAL_NEON_UTILS_HPP__ -#define __OPENCV_HAL_NEON_UTILS_HPP__ +#ifndef OPENCV_HAL_NEON_UTILS_HPP +#define OPENCV_HAL_NEON_UTILS_HPP #include "opencv2/core/cvdef.h" @@ -125,4 +125,4 @@ inline float32x2_t cv_vsqrt_f32(float32x2_t val) //! @} -#endif // __OPENCV_HAL_NEON_UTILS_HPP__ +#endif // OPENCV_HAL_NEON_UTILS_HPP diff --git a/IPL/include/opencv/opencv2/core/ocl.hpp b/IPL/include/opencv/opencv2/core/ocl.hpp index bc989a3..115f5d1 100644 --- a/IPL/include/opencv/opencv2/core/ocl.hpp +++ b/IPL/include/opencv/opencv2/core/ocl.hpp @@ -39,8 +39,8 @@ // //M*/ -#ifndef __OPENCV_OPENCL_HPP__ -#define __OPENCV_OPENCL_HPP__ +#ifndef OPENCV_OPENCL_HPP +#define OPENCV_OPENCL_HPP #include "opencv2/core.hpp" @@ -59,7 +59,7 @@ CV_EXPORTS_W void finish(); CV_EXPORTS bool haveSVM(); class CV_EXPORTS Context; -class CV_EXPORTS Device; +class CV_EXPORTS_W_SIMPLE Device; class CV_EXPORTS Kernel; class CV_EXPORTS Program; class CV_EXPORTS ProgramSource; @@ -67,14 +67,14 @@ class CV_EXPORTS Queue; class CV_EXPORTS PlatformInfo; class CV_EXPORTS Image2D; -class CV_EXPORTS Device +class CV_EXPORTS_W_SIMPLE Device { public: - Device(); + CV_WRAP Device(); explicit Device(void* d); Device(const Device& d); Device& operator = (const Device& d); - ~Device(); + CV_WRAP ~Device(); void set(void* d); @@ -89,23 +89,24 @@ class CV_EXPORTS Device TYPE_ALL = 0xFFFFFFFF }; - String name() const; - String extensions() const; - String version() const; - String vendorName() const; - String OpenCL_C_Version() const; - String OpenCLVersion() const; - int deviceVersionMajor() const; - int deviceVersionMinor() const; - String driverVersion() const; + CV_WRAP String name() const; + CV_WRAP String extensions() const; + CV_WRAP bool isExtensionSupported(const String& extensionName) const; + CV_WRAP String version() const; + CV_WRAP String vendorName() const; + CV_WRAP String OpenCL_C_Version() const; + CV_WRAP String OpenCLVersion() const; + CV_WRAP int deviceVersionMajor() const; + CV_WRAP int deviceVersionMinor() const; + CV_WRAP String driverVersion() const; void* ptr() const; - int type() const; + CV_WRAP int type() const; - int addressBits() const; - bool available() const; - bool compilerAvailable() const; - bool linkerAvailable() const; + CV_WRAP int addressBits() const; + CV_WRAP bool available() const; + CV_WRAP bool compilerAvailable() const; + CV_WRAP bool linkerAvailable() const; enum { @@ -118,21 +119,21 @@ class CV_EXPORTS Device FP_SOFT_FLOAT=(1 << 6), FP_CORRECTLY_ROUNDED_DIVIDE_SQRT=(1 << 7) }; - int doubleFPConfig() const; - int singleFPConfig() const; - int halfFPConfig() const; + CV_WRAP int doubleFPConfig() const; + CV_WRAP int singleFPConfig() const; + CV_WRAP int halfFPConfig() const; - bool endianLittle() const; - bool errorCorrectionSupport() const; + CV_WRAP bool endianLittle() const; + CV_WRAP bool errorCorrectionSupport() const; enum { EXEC_KERNEL=(1 << 0), EXEC_NATIVE_KERNEL=(1 << 1) }; - int executionCapabilities() const; + CV_WRAP int executionCapabilities() const; - size_t globalMemCacheSize() const; + CV_WRAP size_t globalMemCacheSize() const; enum { @@ -140,35 +141,38 @@ class CV_EXPORTS Device READ_ONLY_CACHE=1, READ_WRITE_CACHE=2 }; - int globalMemCacheType() const; - int globalMemCacheLineSize() const; - size_t globalMemSize() const; + CV_WRAP int globalMemCacheType() const; + CV_WRAP int globalMemCacheLineSize() const; + CV_WRAP size_t globalMemSize() const; - size_t localMemSize() const; + CV_WRAP size_t localMemSize() const; enum { NO_LOCAL_MEM=0, LOCAL_IS_LOCAL=1, LOCAL_IS_GLOBAL=2 }; - int localMemType() const; - bool hostUnifiedMemory() const; + CV_WRAP int localMemType() const; + CV_WRAP bool hostUnifiedMemory() const; - bool imageSupport() const; + CV_WRAP bool imageSupport() const; - bool imageFromBufferSupport() const; + CV_WRAP bool imageFromBufferSupport() const; uint imagePitchAlignment() const; uint imageBaseAddressAlignment() const; - size_t image2DMaxWidth() const; - size_t image2DMaxHeight() const; + /// deprecated, use isExtensionSupported() method (probably with "cl_khr_subgroups" value) + CV_WRAP bool intelSubgroupsSupport() const; + + CV_WRAP size_t image2DMaxWidth() const; + CV_WRAP size_t image2DMaxHeight() const; - size_t image3DMaxWidth() const; - size_t image3DMaxHeight() const; - size_t image3DMaxDepth() const; + CV_WRAP size_t image3DMaxWidth() const; + CV_WRAP size_t image3DMaxHeight() const; + CV_WRAP size_t image3DMaxDepth() const; - size_t imageMaxBufferSize() const; - size_t imageMaxArraySize() const; + CV_WRAP size_t imageMaxBufferSize() const; + CV_WRAP size_t imageMaxArraySize() const; enum { @@ -177,53 +181,53 @@ class CV_EXPORTS Device VENDOR_INTEL=2, VENDOR_NVIDIA=3 }; - int vendorID() const; + CV_WRAP int vendorID() const; // FIXIT // dev.isAMD() doesn't work for OpenCL CPU devices from AMD OpenCL platform. // This method should use platform name instead of vendor name. // After fix restore code in arithm.cpp: ocl_compare() - inline bool isAMD() const { return vendorID() == VENDOR_AMD; } - inline bool isIntel() const { return vendorID() == VENDOR_INTEL; } - inline bool isNVidia() const { return vendorID() == VENDOR_NVIDIA; } + CV_WRAP inline bool isAMD() const { return vendorID() == VENDOR_AMD; } + CV_WRAP inline bool isIntel() const { return vendorID() == VENDOR_INTEL; } + CV_WRAP inline bool isNVidia() const { return vendorID() == VENDOR_NVIDIA; } - int maxClockFrequency() const; - int maxComputeUnits() const; - int maxConstantArgs() const; - size_t maxConstantBufferSize() const; + CV_WRAP int maxClockFrequency() const; + CV_WRAP int maxComputeUnits() const; + CV_WRAP int maxConstantArgs() const; + CV_WRAP size_t maxConstantBufferSize() const; - size_t maxMemAllocSize() const; - size_t maxParameterSize() const; + CV_WRAP size_t maxMemAllocSize() const; + CV_WRAP size_t maxParameterSize() const; - int maxReadImageArgs() const; - int maxWriteImageArgs() const; - int maxSamplers() const; + CV_WRAP int maxReadImageArgs() const; + CV_WRAP int maxWriteImageArgs() const; + CV_WRAP int maxSamplers() const; - size_t maxWorkGroupSize() const; - int maxWorkItemDims() const; + CV_WRAP size_t maxWorkGroupSize() const; + CV_WRAP int maxWorkItemDims() const; void maxWorkItemSizes(size_t*) const; - int memBaseAddrAlign() const; + CV_WRAP int memBaseAddrAlign() const; - int nativeVectorWidthChar() const; - int nativeVectorWidthShort() const; - int nativeVectorWidthInt() const; - int nativeVectorWidthLong() const; - int nativeVectorWidthFloat() const; - int nativeVectorWidthDouble() const; - int nativeVectorWidthHalf() const; + CV_WRAP int nativeVectorWidthChar() const; + CV_WRAP int nativeVectorWidthShort() const; + CV_WRAP int nativeVectorWidthInt() const; + CV_WRAP int nativeVectorWidthLong() const; + CV_WRAP int nativeVectorWidthFloat() const; + CV_WRAP int nativeVectorWidthDouble() const; + CV_WRAP int nativeVectorWidthHalf() const; - int preferredVectorWidthChar() const; - int preferredVectorWidthShort() const; - int preferredVectorWidthInt() const; - int preferredVectorWidthLong() const; - int preferredVectorWidthFloat() const; - int preferredVectorWidthDouble() const; - int preferredVectorWidthHalf() const; + CV_WRAP int preferredVectorWidthChar() const; + CV_WRAP int preferredVectorWidthShort() const; + CV_WRAP int preferredVectorWidthInt() const; + CV_WRAP int preferredVectorWidthLong() const; + CV_WRAP int preferredVectorWidthFloat() const; + CV_WRAP int preferredVectorWidthDouble() const; + CV_WRAP int preferredVectorWidthHalf() const; - size_t printfBufferSize() const; - size_t profilingTimerResolution() const; + CV_WRAP size_t printfBufferSize() const; + CV_WRAP size_t profilingTimerResolution() const; - static const Device& getDefault(); + CV_WRAP static const Device& getDefault(); protected: struct Impl; @@ -246,6 +250,7 @@ class CV_EXPORTS Context const Device& device(size_t idx) const; Program getProg(const ProgramSource& prog, const String& buildopt, String& errmsg); + void unloadProg(Program& prog); static Context& getDefault(bool initialize = true); void* ptr() const; @@ -256,6 +261,8 @@ class CV_EXPORTS Context void setUseSVM(bool enabled); struct Impl; + inline Impl* getImpl() const { return (Impl*)p; } +//protected: Impl* p; }; @@ -276,55 +283,38 @@ class CV_EXPORTS Platform Impl* p; }; -/* -//! @brief Attaches OpenCL context to OpenCV -// -//! @note Note: -// OpenCV will check if available OpenCL platform has platformName name, -// then assign context to OpenCV and call clRetainContext function. -// The deviceID device will be used as target device and new command queue -// will be created. -// -// Params: -//! @param platformName - name of OpenCL platform to attach, -//! this string is used to check if platform is available -//! to OpenCV at runtime -//! @param platfromID - ID of platform attached context was created for -//! @param context - OpenCL context to be attached to OpenCV -//! @param deviceID - ID of device, must be created from attached context +/** @brief Attaches OpenCL context to OpenCV +@note + OpenCV will check if available OpenCL platform has platformName name, then assign context to + OpenCV and call `clRetainContext` function. The deviceID device will be used as target device and + new command queue will be created. +@param platformName name of OpenCL platform to attach, this string is used to check if platform is available to OpenCV at runtime +@param platformID ID of platform attached context was created for +@param context OpenCL context to be attached to OpenCV +@param deviceID ID of device, must be created from attached context */ CV_EXPORTS void attachContext(const String& platformName, void* platformID, void* context, void* deviceID); -/* -//! @brief Convert OpenCL buffer to UMat -// -//! @note Note: -// OpenCL buffer (cl_mem_buffer) should contain 2D image data, compatible with OpenCV. -// Memory content is not copied from clBuffer to UMat. Instead, buffer handle assigned -// to UMat and clRetainMemObject is called. -// -// Params: -//! @param cl_mem_buffer - source clBuffer handle -//! @param step - num of bytes in single row -//! @param rows - number of rows -//! @param cols - number of cols -//! @param type - OpenCV type of image -//! @param dst - destination UMat +/** @brief Convert OpenCL buffer to UMat +@note + OpenCL buffer (cl_mem_buffer) should contain 2D image data, compatible with OpenCV. Memory + content is not copied from `clBuffer` to UMat. Instead, buffer handle assigned to UMat and + `clRetainMemObject` is called. +@param cl_mem_buffer source clBuffer handle +@param step num of bytes in single row +@param rows number of rows +@param cols number of cols +@param type OpenCV type of image +@param dst destination UMat */ CV_EXPORTS void convertFromBuffer(void* cl_mem_buffer, size_t step, int rows, int cols, int type, UMat& dst); -/* -//! @brief Convert OpenCL image2d_t to UMat -// -//! @note Note: -// OpenCL image2d_t (cl_mem_image), should be compatible with OpenCV -// UMat formats. -// Memory content is copied from image to UMat with -// clEnqueueCopyImageToBuffer function. -// -// Params: -//! @param cl_mem_image - source image2d_t handle -//! @param dst - destination UMat +/** @brief Convert OpenCL image2d_t to UMat +@note + OpenCL `image2d_t` (cl_mem_image), should be compatible with OpenCV UMat formats. Memory content + is copied from image to UMat with `clEnqueueCopyImageToBuffer` function. +@param cl_mem_image source image2d_t handle +@param dst destination UMat */ CV_EXPORTS void convertFromImage(void* cl_mem_image, UMat& dst); @@ -345,8 +335,12 @@ class CV_EXPORTS Queue void* ptr() const; static Queue& getDefault(); + /// @brief Returns OpenCL command queue with enable profiling mode support + const Queue& getProfilingQueue() const; + + struct Impl; friend struct Impl; + inline Impl* getImpl() const { return p; } protected: - struct Impl; Impl* p; }; @@ -358,7 +352,8 @@ class CV_EXPORTS KernelArg KernelArg(int _flags, UMat* _m, int wscale=1, int iwscale=1, const void* _obj=0, size_t _sz=0); KernelArg(); - static KernelArg Local() { return KernelArg(LOCAL, 0); } + static KernelArg Local(size_t localMemSize) + { return KernelArg(LOCAL, 0, 1, 1, 0, localMemSize); } static KernelArg PtrWriteOnly(const UMat& m) { return KernelArg(PTR_ONLY+WRITE_ONLY, (UMat*)&m); } static KernelArg PtrReadOnly(const UMat& m) @@ -412,166 +407,48 @@ class CV_EXPORTS Kernel template int set(int i, const _Tp& value) { return set(i, &value, sizeof(value)); } - template - Kernel& args(const _Tp0& a0) - { - set(0, a0); return *this; - } - - template - Kernel& args(const _Tp0& a0, const _Tp1& a1) - { - int i = set(0, a0); set(i, a1); return *this; - } - - template - Kernel& args(const _Tp0& a0, const _Tp1& a1, const _Tp2& a2) - { - int i = set(0, a0); i = set(i, a1); set(i, a2); return *this; - } - - template - Kernel& args(const _Tp0& a0, const _Tp1& a1, const _Tp2& a2, const _Tp3& a3) - { - int i = set(0, a0); i = set(i, a1); i = set(i, a2); i = set(i, a3); return *this; - } - - template - Kernel& args(const _Tp0& a0, const _Tp1& a1, const _Tp2& a2, - const _Tp3& a3, const _Tp4& a4) - { - int i = set(0, a0); i = set(i, a1); i = set(i, a2); - i = set(i, a3); set(i, a4); return *this; - } - - template - Kernel& args(const _Tp0& a0, const _Tp1& a1, const _Tp2& a2, - const _Tp3& a3, const _Tp4& a4, const _Tp5& a5) - { - int i = set(0, a0); i = set(i, a1); i = set(i, a2); - i = set(i, a3); i = set(i, a4); set(i, a5); return *this; - } - - template - Kernel& args(const _Tp0& a0, const _Tp1& a1, const _Tp2& a2, const _Tp3& a3, - const _Tp4& a4, const _Tp5& a5, const _Tp6& a6) - { - int i = set(0, a0); i = set(i, a1); i = set(i, a2); i = set(i, a3); - i = set(i, a4); i = set(i, a5); set(i, a6); return *this; - } - - template - Kernel& args(const _Tp0& a0, const _Tp1& a1, const _Tp2& a2, const _Tp3& a3, - const _Tp4& a4, const _Tp5& a5, const _Tp6& a6, const _Tp7& a7) - { - int i = set(0, a0); i = set(i, a1); i = set(i, a2); i = set(i, a3); - i = set(i, a4); i = set(i, a5); i = set(i, a6); set(i, a7); return *this; - } - - template - Kernel& args(const _Tp0& a0, const _Tp1& a1, const _Tp2& a2, const _Tp3& a3, - const _Tp4& a4, const _Tp5& a5, const _Tp6& a6, const _Tp7& a7, - const _Tp8& a8) - { - int i = set(0, a0); i = set(i, a1); i = set(i, a2); i = set(i, a3); i = set(i, a4); - i = set(i, a5); i = set(i, a6); i = set(i, a7); set(i, a8); return *this; - } - - template - Kernel& args(const _Tp0& a0, const _Tp1& a1, const _Tp2& a2, const _Tp3& a3, - const _Tp4& a4, const _Tp5& a5, const _Tp6& a6, const _Tp7& a7, - const _Tp8& a8, const _Tp9& a9) - { - int i = set(0, a0); i = set(i, a1); i = set(i, a2); i = set(i, a3); i = set(i, a4); i = set(i, a5); - i = set(i, a6); i = set(i, a7); i = set(i, a8); set(i, a9); return *this; - } - - template - Kernel& args(const _Tp0& a0, const _Tp1& a1, const _Tp2& a2, const _Tp3& a3, - const _Tp4& a4, const _Tp5& a5, const _Tp6& a6, const _Tp7& a7, - const _Tp8& a8, const _Tp9& a9, const _Tp10& a10) - { - int i = set(0, a0); i = set(i, a1); i = set(i, a2); i = set(i, a3); i = set(i, a4); i = set(i, a5); - i = set(i, a6); i = set(i, a7); i = set(i, a8); i = set(i, a9); set(i, a10); return *this; - } - - template - Kernel& args(const _Tp0& a0, const _Tp1& a1, const _Tp2& a2, const _Tp3& a3, - const _Tp4& a4, const _Tp5& a5, const _Tp6& a6, const _Tp7& a7, - const _Tp8& a8, const _Tp9& a9, const _Tp10& a10, const _Tp11& a11) - { - int i = set(0, a0); i = set(i, a1); i = set(i, a2); i = set(i, a3); i = set(i, a4); i = set(i, a5); - i = set(i, a6); i = set(i, a7); i = set(i, a8); i = set(i, a9); i = set(i, a10); set(i, a11); return *this; - } - - template - Kernel& args(const _Tp0& a0, const _Tp1& a1, const _Tp2& a2, const _Tp3& a3, - const _Tp4& a4, const _Tp5& a5, const _Tp6& a6, const _Tp7& a7, - const _Tp8& a8, const _Tp9& a9, const _Tp10& a10, const _Tp11& a11, - const _Tp12& a12) - { - int i = set(0, a0); i = set(i, a1); i = set(i, a2); i = set(i, a3); i = set(i, a4); i = set(i, a5); - i = set(i, a6); i = set(i, a7); i = set(i, a8); i = set(i, a9); i = set(i, a10); i = set(i, a11); - set(i, a12); return *this; - } - - template - Kernel& args(const _Tp0& a0, const _Tp1& a1, const _Tp2& a2, const _Tp3& a3, - const _Tp4& a4, const _Tp5& a5, const _Tp6& a6, const _Tp7& a7, - const _Tp8& a8, const _Tp9& a9, const _Tp10& a10, const _Tp11& a11, - const _Tp12& a12, const _Tp13& a13) - { - int i = set(0, a0); i = set(i, a1); i = set(i, a2); i = set(i, a3); i = set(i, a4); i = set(i, a5); - i = set(i, a6); i = set(i, a7); i = set(i, a8); i = set(i, a9); i = set(i, a10); i = set(i, a11); - i = set(i, a12); set(i, a13); return *this; - } - - template - Kernel& args(const _Tp0& a0, const _Tp1& a1, const _Tp2& a2, const _Tp3& a3, - const _Tp4& a4, const _Tp5& a5, const _Tp6& a6, const _Tp7& a7, - const _Tp8& a8, const _Tp9& a9, const _Tp10& a10, const _Tp11& a11, - const _Tp12& a12, const _Tp13& a13, const _Tp14& a14) - { - int i = set(0, a0); i = set(i, a1); i = set(i, a2); i = set(i, a3); i = set(i, a4); i = set(i, a5); - i = set(i, a6); i = set(i, a7); i = set(i, a8); i = set(i, a9); i = set(i, a10); i = set(i, a11); - i = set(i, a12); i = set(i, a13); set(i, a14); return *this; - } - - template - Kernel& args(const _Tp0& a0, const _Tp1& a1, const _Tp2& a2, const _Tp3& a3, - const _Tp4& a4, const _Tp5& a5, const _Tp6& a6, const _Tp7& a7, - const _Tp8& a8, const _Tp9& a9, const _Tp10& a10, const _Tp11& a11, - const _Tp12& a12, const _Tp13& a13, const _Tp14& a14, const _Tp15& a15) - { - int i = set(0, a0); i = set(i, a1); i = set(i, a2); i = set(i, a3); i = set(i, a4); i = set(i, a5); - i = set(i, a6); i = set(i, a7); i = set(i, a8); i = set(i, a9); i = set(i, a10); i = set(i, a11); - i = set(i, a12); i = set(i, a13); i = set(i, a14); set(i, a15); return *this; - } +protected: + template inline + int set_args_(int i, const _Tp0& a0) { return set(i, a0); } + template inline + int set_args_(int i, const _Tp0& a0, const _Tps&... rest_args) { i = set(i, a0); return set_args_(i, rest_args...); } +public: + /** @brief Setup OpenCL Kernel arguments. + Avoid direct using of set(i, ...) methods. + @code + bool ok = kernel + .args( + srcUMat, dstUMat, + (float)some_float_param + ).run(ndims, globalSize, localSize); + if (!ok) return false; + @endcode + */ + template inline + Kernel& args(const _Tps&... kernel_args) { set_args_(0, kernel_args...); return *this; } + + + /** @brief Run the OpenCL kernel. + @param dims the work problem dimensions. It is the length of globalsize and localsize. It can be either 1, 2 or 3. + @param globalsize work items for each dimension. It is not the final globalsize passed to + OpenCL. Each dimension will be adjusted to the nearest integer divisible by the corresponding + value in localsize. If localsize is NULL, it will still be adjusted depending on dims. The + adjusted values are greater than or equal to the original values. + @param localsize work-group size for each dimension. + @param sync specify whether to wait for OpenCL computation to finish before return. + @param q command queue + */ bool run(int dims, size_t globalsize[], size_t localsize[], bool sync, const Queue& q=Queue()); bool runTask(bool sync, const Queue& q=Queue()); + /** @brief Similar to synchronized run() call with returning of kernel execution time + * Separate OpenCL command queue may be used (with CL_QUEUE_PROFILING_ENABLE) + * @return Execution time in nanoseconds or negative number on error + */ + int64 runProfiling(int dims, size_t globalsize[], size_t localsize[], const Queue& q=Queue()); + size_t workGroupSize() const; size_t preferedWorkGroupSizeMultiple() const; bool compileWorkGroupSize(size_t wsz[]) const; @@ -590,7 +467,6 @@ class CV_EXPORTS Program Program(); Program(const ProgramSource& src, const String& buildflags, String& errmsg); - explicit Program(const String& buf); Program(const Program& prog); Program& operator = (const Program& prog); @@ -598,38 +474,104 @@ class CV_EXPORTS Program bool create(const ProgramSource& src, const String& buildflags, String& errmsg); - bool read(const String& buf, const String& buildflags); - bool write(String& buf) const; - const ProgramSource& source() const; void* ptr() const; - String getPrefix() const; - static String getPrefix(const String& buildflags); - + /** + * @brief Query device-specific program binary. + * + * Returns RAW OpenCL executable binary without additional attachments. + * + * @sa ProgramSource::fromBinary + * + * @param[out] binary output buffer + */ + void getBinary(std::vector& binary) const; + + struct Impl; friend struct Impl; + inline Impl* getImpl() const { return (Impl*)p; } protected: - struct Impl; Impl* p; +public: +#ifndef OPENCV_REMOVE_DEPRECATED_API + // TODO Remove this + CV_DEPRECATED bool read(const String& buf, const String& buildflags); // removed, use ProgramSource instead + CV_DEPRECATED bool write(String& buf) const; // removed, use getBinary() method instead (RAW OpenCL binary) + CV_DEPRECATED const ProgramSource& source() const; // implementation removed + CV_DEPRECATED String getPrefix() const; // deprecated, implementation replaced + CV_DEPRECATED static String getPrefix(const String& buildflags); // deprecated, implementation replaced +#endif }; class CV_EXPORTS ProgramSource { public: - typedef uint64 hash_t; + typedef uint64 hash_t; // deprecated ProgramSource(); - explicit ProgramSource(const String& prog); - explicit ProgramSource(const char* prog); + explicit ProgramSource(const String& module, const String& name, const String& codeStr, const String& codeHash); + explicit ProgramSource(const String& prog); // deprecated + explicit ProgramSource(const char* prog); // deprecated ~ProgramSource(); ProgramSource(const ProgramSource& prog); ProgramSource& operator = (const ProgramSource& prog); - const String& source() const; - hash_t hash() const; - + const String& source() const; // deprecated + hash_t hash() const; // deprecated + + + /** @brief Describe OpenCL program binary. + * Do not call clCreateProgramWithBinary() and/or clBuildProgram(). + * + * Caller should guarantee binary buffer lifetime greater than ProgramSource object (and any of its copies). + * + * This kind of binary is not portable between platforms in general - it is specific to OpenCL vendor / device / driver version. + * + * @param module name of program owner module + * @param name unique name of program (module+name is used as key for OpenCL program caching) + * @param binary buffer address. See buffer lifetime requirement in description. + * @param size buffer size + * @param buildOptions additional program-related build options passed to clBuildProgram() + * @return created ProgramSource object + */ + static ProgramSource fromBinary(const String& module, const String& name, + const unsigned char* binary, const size_t size, + const cv::String& buildOptions = cv::String()); + + /** @brief Describe OpenCL program in SPIR format. + * Do not call clCreateProgramWithBinary() and/or clBuildProgram(). + * + * Supports SPIR 1.2 by default (pass '-spir-std=X.Y' in buildOptions to override this behavior) + * + * Caller should guarantee binary buffer lifetime greater than ProgramSource object (and any of its copies). + * + * Programs in this format are portable between OpenCL implementations with 'khr_spir' extension: + * https://www.khronos.org/registry/OpenCL/sdk/2.0/docs/man/xhtml/cl_khr_spir.html + * (but they are not portable between different platforms: 32-bit / 64-bit) + * + * Note: these programs can't support vendor specific extensions, like 'cl_intel_subgroups'. + * + * @param module name of program owner module + * @param name unique name of program (module+name is used as key for OpenCL program caching) + * @param binary buffer address. See buffer lifetime requirement in description. + * @param size buffer size + * @param buildOptions additional program-related build options passed to clBuildProgram() + * (these options are added automatically: '-x spir' and '-spir-std=1.2') + * @return created ProgramSource object. + */ + static ProgramSource fromSPIR(const String& module, const String& name, + const unsigned char* binary, const size_t size, + const cv::String& buildOptions = cv::String()); + + //OpenCL 2.1+ only + //static Program fromSPIRV(const String& module, const String& name, + // const unsigned char* binary, const size_t size, + // const cv::String& buildOptions = cv::String()); + + struct Impl; friend struct Impl; + inline Impl* getImpl() const { return (Impl*)p; } protected: - struct Impl; Impl* p; }; @@ -658,6 +600,7 @@ CV_EXPORTS const char* convertTypeStr(int sdepth, int ddepth, int cn, char* buf) CV_EXPORTS const char* typeToStr(int t); CV_EXPORTS const char* memopTypeToStr(int t); CV_EXPORTS const char* vecopTypeToStr(int t); +CV_EXPORTS const char* getOpenCLErrorString(int errorCode); CV_EXPORTS String kernelToStr(InputArray _kernel, int ddepth = -1, const char * name = NULL); CV_EXPORTS void getPlatfomsInfo(std::vector& platform_info); @@ -697,22 +640,25 @@ class CV_EXPORTS Image2D public: Image2D(); - // src: The UMat from which to get image properties and data - // norm: Flag to enable the use of normalized channel data types - // alias: Flag indicating that the image should alias the src UMat. - // If true, changes to the image or src will be reflected in - // both objects. + /** + @param src UMat object from which to get image properties and data + @param norm flag to enable the use of normalized channel data types + @param alias flag indicating that the image should alias the src UMat. If true, changes to the + image or src will be reflected in both objects. + */ explicit Image2D(const UMat &src, bool norm = false, bool alias = false); Image2D(const Image2D & i); ~Image2D(); Image2D & operator = (const Image2D & i); - // Indicates if creating an aliased image should succeed. Depends on the - // underlying platform and the dimensions of the UMat. + /** Indicates if creating an aliased image should succeed. + Depends on the underlying platform and the dimensions of the UMat. + */ static bool canCreateAlias(const UMat &u); - // Indicates if the image format is supported. + /** Indicates if the image format is supported. + */ static bool isFormatSupported(int depth, int cn, bool norm); void* ptr() const; @@ -721,6 +667,24 @@ class CV_EXPORTS Image2D Impl* p; }; +class CV_EXPORTS Timer +{ +public: + Timer(const Queue& q); + ~Timer(); + void start(); + void stop(); + + uint64 durationNS() const; //< duration in nanoseconds + +protected: + struct Impl; + Impl* const p; + +private: + Timer(const Timer&); // disabled + Timer& operator=(const Timer&); // disabled +}; CV_EXPORTS MatAllocator* getOpenCLAllocator(); @@ -728,6 +692,9 @@ CV_EXPORTS MatAllocator* getOpenCLAllocator(); #ifdef __OPENCV_BUILD namespace internal { +CV_EXPORTS bool isOpenCLForced(); +#define OCL_FORCE_CHECK(condition) (cv::ocl::internal::isOpenCLForced() || (condition)) + CV_EXPORTS bool isPerformanceCheckBypassed(); #define OCL_PERFORMANCE_CHECK(condition) (cv::ocl::internal::isPerformanceCheckBypassed() || (condition)) diff --git a/IPL/include/opencv/opencv2/core/ocl_genbase.hpp b/IPL/include/opencv/opencv2/core/ocl_genbase.hpp index d53bc1a..5334cf1 100644 --- a/IPL/include/opencv/opencv2/core/ocl_genbase.hpp +++ b/IPL/include/opencv/opencv2/core/ocl_genbase.hpp @@ -39,26 +39,31 @@ // //M*/ -#ifndef __OPENCV_OPENCL_GENBASE_HPP__ -#define __OPENCV_OPENCL_GENBASE_HPP__ - -namespace cv -{ -namespace ocl -{ +#ifndef OPENCV_OPENCL_GENBASE_HPP +#define OPENCV_OPENCL_GENBASE_HPP //! @cond IGNORED -struct ProgramEntry +namespace cv { +namespace ocl { + +class ProgramSource; + +namespace internal { + +struct CV_EXPORTS ProgramEntry { + const char* module; const char* name; - const char* programStr; + const char* programCode; const char* programHash; + ProgramSource* pProgramSource; + + operator ProgramSource& () const; }; -//! @endcond +} } } // namespace -} -} +//! @endcond #endif diff --git a/IPL/include/opencv/opencv2/core/opencl/ocl_defs.hpp b/IPL/include/opencv/opencv2/core/opencl/ocl_defs.hpp new file mode 100644 index 0000000..14df750 --- /dev/null +++ b/IPL/include/opencv/opencv2/core/opencl/ocl_defs.hpp @@ -0,0 +1,82 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +// Copyright (C) 2014, Advanced Micro Devices, Inc., all rights reserved. +// Third party copyrights are property of their respective owners. + +#ifndef OPENCV_CORE_OPENCL_DEFS_HPP +#define OPENCV_CORE_OPENCL_DEFS_HPP + +#include "opencv2/core/utility.hpp" +#include "cvconfig.h" + +namespace cv { namespace ocl { +#ifdef HAVE_OPENCL +/// Call is similar to useOpenCL() but doesn't try to load OpenCL runtime or create OpenCL context +CV_EXPORTS bool isOpenCLActivated(); +#else +static inline bool isOpenCLActivated() { return false; } +#endif +}} // namespace + + +//#define CV_OPENCL_RUN_ASSERT + +#ifdef HAVE_OPENCL + +#ifdef CV_OPENCL_RUN_VERBOSE +#define CV_OCL_RUN_(condition, func, ...) \ + { \ + if (cv::ocl::isOpenCLActivated() && (condition) && func) \ + { \ + printf("%s: OpenCL implementation is running\n", CV_Func); \ + fflush(stdout); \ + CV_IMPL_ADD(CV_IMPL_OCL); \ + return __VA_ARGS__; \ + } \ + else \ + { \ + printf("%s: Plain implementation is running\n", CV_Func); \ + fflush(stdout); \ + } \ + } +#elif defined CV_OPENCL_RUN_ASSERT +#define CV_OCL_RUN_(condition, func, ...) \ + { \ + if (cv::ocl::isOpenCLActivated() && (condition)) \ + { \ + if(func) \ + { \ + CV_IMPL_ADD(CV_IMPL_OCL); \ + } \ + else \ + { \ + CV_Error(cv::Error::StsAssert, #func); \ + } \ + return __VA_ARGS__; \ + } \ + } +#else +#define CV_OCL_RUN_(condition, func, ...) \ +try \ +{ \ + if (cv::ocl::isOpenCLActivated() && (condition) && func) \ + { \ + CV_IMPL_ADD(CV_IMPL_OCL); \ + return __VA_ARGS__; \ + } \ +} \ +catch (const cv::Exception& e) \ +{ \ + CV_UNUSED(e); /* TODO: Add some logging here */ \ +} +#endif + +#else +#define CV_OCL_RUN_(condition, func, ...) +#endif + +#define CV_OCL_RUN(condition, func) CV_OCL_RUN_(condition, func) + +#endif // OPENCV_CORE_OPENCL_DEFS_HPP diff --git a/IPL/include/opencv/opencv2/core/opencl/opencl_info.hpp b/IPL/include/opencv/opencv2/core/opencl/opencl_info.hpp new file mode 100644 index 0000000..5e5c846 --- /dev/null +++ b/IPL/include/opencv/opencv2/core/opencl/opencl_info.hpp @@ -0,0 +1,205 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#include + +#include +#include + +#ifndef DUMP_CONFIG_PROPERTY +#define DUMP_CONFIG_PROPERTY(...) +#endif + +#ifndef DUMP_MESSAGE_STDOUT +#define DUMP_MESSAGE_STDOUT(...) do { std::cout << __VA_ARGS__ << std::endl; } while (false) +#endif + +namespace cv { + +namespace { +static std::string bytesToStringRepr(size_t value) +{ + size_t b = value % 1024; + value /= 1024; + + size_t kb = value % 1024; + value /= 1024; + + size_t mb = value % 1024; + value /= 1024; + + size_t gb = value; + + std::ostringstream stream; + + if (gb > 0) + stream << gb << " GB "; + if (mb > 0) + stream << mb << " MB "; + if (kb > 0) + stream << kb << " KB "; + if (b > 0) + stream << b << " B"; + + std::string s = stream.str(); + if (s[s.size() - 1] == ' ') + s = s.substr(0, s.size() - 1); + return s; +} + +static String getDeviceTypeString(const cv::ocl::Device& device) +{ + if (device.type() == cv::ocl::Device::TYPE_CPU) { + return "CPU"; + } + + if (device.type() == cv::ocl::Device::TYPE_GPU) { + if (device.hostUnifiedMemory()) { + return "iGPU"; + } else { + return "dGPU"; + } + } + + return "unknown"; +} +} // namespace + +static void dumpOpenCLInformation() +{ + using namespace cv::ocl; + + try + { + if (!haveOpenCL() || !useOpenCL()) + { + DUMP_MESSAGE_STDOUT("OpenCL is disabled"); + DUMP_CONFIG_PROPERTY("cv_ocl", "disabled"); + return; + } + + std::vector platforms; + cv::ocl::getPlatfomsInfo(platforms); + if (platforms.empty()) + { + DUMP_MESSAGE_STDOUT("OpenCL is not available"); + DUMP_CONFIG_PROPERTY("cv_ocl", "not available"); + return; + } + + DUMP_MESSAGE_STDOUT("OpenCL Platforms: "); + for (size_t i = 0; i < platforms.size(); i++) + { + const PlatformInfo* platform = &platforms[i]; + DUMP_MESSAGE_STDOUT(" " << platform->name()); + Device current_device; + for (int j = 0; j < platform->deviceNumber(); j++) + { + platform->getDevice(current_device, j); + String deviceTypeStr = getDeviceTypeString(current_device); + DUMP_MESSAGE_STDOUT( " " << deviceTypeStr << ": " << current_device.name() << " (" << current_device.version() << ")"); + DUMP_CONFIG_PROPERTY( cv::format("cv_ocl_platform_%d_device_%d", (int)i, j ), + cv::format("(Platform=%s)(Type=%s)(Name=%s)(Version=%s)", + platform->name().c_str(), deviceTypeStr.c_str(), current_device.name().c_str(), current_device.version().c_str()) ); + } + } + const Device& device = Device::getDefault(); + if (!device.available()) + CV_Error(Error::OpenCLInitError, "OpenCL device is not available"); + + DUMP_MESSAGE_STDOUT("Current OpenCL device: "); + + String deviceTypeStr = getDeviceTypeString(device); + DUMP_MESSAGE_STDOUT(" Type = " << deviceTypeStr); + DUMP_CONFIG_PROPERTY("cv_ocl_current_deviceType", deviceTypeStr); + + DUMP_MESSAGE_STDOUT(" Name = " << device.name()); + DUMP_CONFIG_PROPERTY("cv_ocl_current_deviceName", device.name()); + + DUMP_MESSAGE_STDOUT(" Version = " << device.version()); + DUMP_CONFIG_PROPERTY("cv_ocl_current_deviceVersion", device.version()); + + DUMP_MESSAGE_STDOUT(" Driver version = " << device.driverVersion()); + DUMP_CONFIG_PROPERTY("cv_ocl_current_driverVersion", device.driverVersion()); + + DUMP_MESSAGE_STDOUT(" Address bits = " << device.addressBits()); + DUMP_CONFIG_PROPERTY("cv_ocl_current_addressBits", device.addressBits()); + + DUMP_MESSAGE_STDOUT(" Compute units = " << device.maxComputeUnits()); + DUMP_CONFIG_PROPERTY("cv_ocl_current_maxComputeUnits", device.maxComputeUnits()); + + DUMP_MESSAGE_STDOUT(" Max work group size = " << device.maxWorkGroupSize()); + DUMP_CONFIG_PROPERTY("cv_ocl_current_maxWorkGroupSize", device.maxWorkGroupSize()); + + std::string localMemorySizeStr = bytesToStringRepr(device.localMemSize()); + DUMP_MESSAGE_STDOUT(" Local memory size = " << localMemorySizeStr); + DUMP_CONFIG_PROPERTY("cv_ocl_current_localMemSize", device.localMemSize()); + + std::string maxMemAllocSizeStr = bytesToStringRepr(device.maxMemAllocSize()); + DUMP_MESSAGE_STDOUT(" Max memory allocation size = " << maxMemAllocSizeStr); + DUMP_CONFIG_PROPERTY("cv_ocl_current_maxMemAllocSize", device.maxMemAllocSize()); + + const char* doubleSupportStr = device.doubleFPConfig() > 0 ? "Yes" : "No"; + DUMP_MESSAGE_STDOUT(" Double support = " << doubleSupportStr); + DUMP_CONFIG_PROPERTY("cv_ocl_current_haveDoubleSupport", device.doubleFPConfig() > 0); + + const char* isUnifiedMemoryStr = device.hostUnifiedMemory() ? "Yes" : "No"; + DUMP_MESSAGE_STDOUT(" Host unified memory = " << isUnifiedMemoryStr); + DUMP_CONFIG_PROPERTY("cv_ocl_current_hostUnifiedMemory", device.hostUnifiedMemory()); + + DUMP_MESSAGE_STDOUT(" Device extensions:"); + String extensionsStr = device.extensions(); + size_t pos = 0; + while (pos < extensionsStr.size()) + { + size_t pos2 = extensionsStr.find(' ', pos); + if (pos2 == String::npos) + pos2 = extensionsStr.size(); + if (pos2 > pos) + { + String extensionName = extensionsStr.substr(pos, pos2 - pos); + DUMP_MESSAGE_STDOUT(" " << extensionName); + } + pos = pos2 + 1; + } + DUMP_CONFIG_PROPERTY("cv_ocl_current_extensions", extensionsStr); + + const char* haveAmdBlasStr = haveAmdBlas() ? "Yes" : "No"; + DUMP_MESSAGE_STDOUT(" Has AMD Blas = " << haveAmdBlasStr); + DUMP_CONFIG_PROPERTY("cv_ocl_current_AmdBlas", haveAmdBlas()); + + const char* haveAmdFftStr = haveAmdFft() ? "Yes" : "No"; + DUMP_MESSAGE_STDOUT(" Has AMD Fft = " << haveAmdFftStr); + DUMP_CONFIG_PROPERTY("cv_ocl_current_AmdFft", haveAmdFft()); + + + DUMP_MESSAGE_STDOUT(" Preferred vector width char = " << device.preferredVectorWidthChar()); + DUMP_CONFIG_PROPERTY("cv_ocl_current_preferredVectorWidthChar", device.preferredVectorWidthChar()); + + DUMP_MESSAGE_STDOUT(" Preferred vector width short = " << device.preferredVectorWidthShort()); + DUMP_CONFIG_PROPERTY("cv_ocl_current_preferredVectorWidthShort", device.preferredVectorWidthShort()); + + DUMP_MESSAGE_STDOUT(" Preferred vector width int = " << device.preferredVectorWidthInt()); + DUMP_CONFIG_PROPERTY("cv_ocl_current_preferredVectorWidthInt", device.preferredVectorWidthInt()); + + DUMP_MESSAGE_STDOUT(" Preferred vector width long = " << device.preferredVectorWidthLong()); + DUMP_CONFIG_PROPERTY("cv_ocl_current_preferredVectorWidthLong", device.preferredVectorWidthLong()); + + DUMP_MESSAGE_STDOUT(" Preferred vector width float = " << device.preferredVectorWidthFloat()); + DUMP_CONFIG_PROPERTY("cv_ocl_current_preferredVectorWidthFloat", device.preferredVectorWidthFloat()); + + DUMP_MESSAGE_STDOUT(" Preferred vector width double = " << device.preferredVectorWidthDouble()); + DUMP_CONFIG_PROPERTY("cv_ocl_current_preferredVectorWidthDouble", device.preferredVectorWidthDouble()); + } + catch (...) + { + DUMP_MESSAGE_STDOUT("Exception. Can't dump OpenCL info"); + DUMP_MESSAGE_STDOUT("OpenCL device not available"); + DUMP_CONFIG_PROPERTY("cv_ocl", "not available"); + } +} +#undef DUMP_MESSAGE_STDOUT +#undef DUMP_CONFIG_PROPERTY + +} // namespace diff --git a/IPL/include/opencv/opencv2/core/opencl/opencl_svm.hpp b/IPL/include/opencv/opencv2/core/opencl/opencl_svm.hpp new file mode 100644 index 0000000..7453082 --- /dev/null +++ b/IPL/include/opencv/opencv2/core/opencl/opencl_svm.hpp @@ -0,0 +1,81 @@ +/* See LICENSE file in the root OpenCV directory */ + +#ifndef OPENCV_CORE_OPENCL_SVM_HPP +#define OPENCV_CORE_OPENCL_SVM_HPP + +// +// Internal usage only (binary compatibility is not guaranteed) +// +#ifndef __OPENCV_BUILD +#error Internal header file +#endif + +#if defined(HAVE_OPENCL) && defined(HAVE_OPENCL_SVM) +#include "runtime/opencl_core.hpp" +#include "runtime/opencl_svm_20.hpp" +#include "runtime/opencl_svm_hsa_extension.hpp" + +namespace cv { namespace ocl { namespace svm { + +struct SVMCapabilities +{ + enum Value + { + SVM_COARSE_GRAIN_BUFFER = (1 << 0), + SVM_FINE_GRAIN_BUFFER = (1 << 1), + SVM_FINE_GRAIN_SYSTEM = (1 << 2), + SVM_ATOMICS = (1 << 3), + }; + int value_; + + SVMCapabilities(int capabilities = 0) : value_(capabilities) { } + operator int() const { return value_; } + + inline bool isNoSVMSupport() const { return value_ == 0; } + inline bool isSupportCoarseGrainBuffer() const { return (value_ & SVM_COARSE_GRAIN_BUFFER) != 0; } + inline bool isSupportFineGrainBuffer() const { return (value_ & SVM_FINE_GRAIN_BUFFER) != 0; } + inline bool isSupportFineGrainSystem() const { return (value_ & SVM_FINE_GRAIN_SYSTEM) != 0; } + inline bool isSupportAtomics() const { return (value_ & SVM_ATOMICS) != 0; } +}; + +CV_EXPORTS const SVMCapabilities getSVMCapabilitites(const ocl::Context& context); + +struct SVMFunctions +{ + clSVMAllocAMD_fn fn_clSVMAlloc; + clSVMFreeAMD_fn fn_clSVMFree; + clSetKernelArgSVMPointerAMD_fn fn_clSetKernelArgSVMPointer; + //clSetKernelExecInfoAMD_fn fn_clSetKernelExecInfo; + //clEnqueueSVMFreeAMD_fn fn_clEnqueueSVMFree; + clEnqueueSVMMemcpyAMD_fn fn_clEnqueueSVMMemcpy; + clEnqueueSVMMemFillAMD_fn fn_clEnqueueSVMMemFill; + clEnqueueSVMMapAMD_fn fn_clEnqueueSVMMap; + clEnqueueSVMUnmapAMD_fn fn_clEnqueueSVMUnmap; + + inline SVMFunctions() + : fn_clSVMAlloc(NULL), fn_clSVMFree(NULL), + fn_clSetKernelArgSVMPointer(NULL), /*fn_clSetKernelExecInfo(NULL),*/ + /*fn_clEnqueueSVMFree(NULL),*/ fn_clEnqueueSVMMemcpy(NULL), fn_clEnqueueSVMMemFill(NULL), + fn_clEnqueueSVMMap(NULL), fn_clEnqueueSVMUnmap(NULL) + { + // nothing + } + + inline bool isValid() const + { + return fn_clSVMAlloc != NULL && fn_clSVMFree && fn_clSetKernelArgSVMPointer && + /*fn_clSetKernelExecInfo && fn_clEnqueueSVMFree &&*/ fn_clEnqueueSVMMemcpy && + fn_clEnqueueSVMMemFill && fn_clEnqueueSVMMap && fn_clEnqueueSVMUnmap; + } +}; + +// We should guarantee that SVMFunctions lifetime is not less than context's lifetime +CV_EXPORTS const SVMFunctions* getSVMFunctions(const ocl::Context& context); + +CV_EXPORTS bool useSVM(UMatUsageFlags usageFlags); + +}}} //namespace cv::ocl::svm +#endif + +#endif // OPENCV_CORE_OPENCL_SVM_HPP +/* End of file. */ diff --git a/IPL/include/opencv/opencv2/core/opencl/runtime/autogenerated/opencl_clamdblas.hpp b/IPL/include/opencv/opencv2/core/opencl/runtime/autogenerated/opencl_clamdblas.hpp new file mode 100644 index 0000000..65c8493 --- /dev/null +++ b/IPL/include/opencv/opencv2/core/opencl/runtime/autogenerated/opencl_clamdblas.hpp @@ -0,0 +1,714 @@ +// +// AUTOGENERATED, DO NOT EDIT +// +#ifndef OPENCV_CORE_OCL_RUNTIME_CLAMDBLAS_HPP +#error "Invalid usage" +#endif + +// generated by parser_clamdblas.py +#define clAmdBlasAddScratchImage clAmdBlasAddScratchImage_ +#define clAmdBlasCaxpy clAmdBlasCaxpy_ +#define clAmdBlasCcopy clAmdBlasCcopy_ +#define clAmdBlasCdotc clAmdBlasCdotc_ +#define clAmdBlasCdotu clAmdBlasCdotu_ +#define clAmdBlasCgbmv clAmdBlasCgbmv_ +#define clAmdBlasCgemm clAmdBlasCgemm_ +#define clAmdBlasCgemmEx clAmdBlasCgemmEx_ +#define clAmdBlasCgemv clAmdBlasCgemv_ +#define clAmdBlasCgemvEx clAmdBlasCgemvEx_ +#define clAmdBlasCgerc clAmdBlasCgerc_ +#define clAmdBlasCgeru clAmdBlasCgeru_ +#define clAmdBlasChbmv clAmdBlasChbmv_ +#define clAmdBlasChemm clAmdBlasChemm_ +#define clAmdBlasChemv clAmdBlasChemv_ +#define clAmdBlasCher clAmdBlasCher_ +#define clAmdBlasCher2 clAmdBlasCher2_ +#define clAmdBlasCher2k clAmdBlasCher2k_ +#define clAmdBlasCherk clAmdBlasCherk_ +#define clAmdBlasChpmv clAmdBlasChpmv_ +#define clAmdBlasChpr clAmdBlasChpr_ +#define clAmdBlasChpr2 clAmdBlasChpr2_ +#define clAmdBlasCrotg clAmdBlasCrotg_ +#define clAmdBlasCscal clAmdBlasCscal_ +#define clAmdBlasCsrot clAmdBlasCsrot_ +#define clAmdBlasCsscal clAmdBlasCsscal_ +#define clAmdBlasCswap clAmdBlasCswap_ +#define clAmdBlasCsymm clAmdBlasCsymm_ +#define clAmdBlasCsyr2k clAmdBlasCsyr2k_ +#define clAmdBlasCsyr2kEx clAmdBlasCsyr2kEx_ +#define clAmdBlasCsyrk clAmdBlasCsyrk_ +#define clAmdBlasCsyrkEx clAmdBlasCsyrkEx_ +#define clAmdBlasCtbmv clAmdBlasCtbmv_ +#define clAmdBlasCtbsv clAmdBlasCtbsv_ +#define clAmdBlasCtpmv clAmdBlasCtpmv_ +#define clAmdBlasCtpsv clAmdBlasCtpsv_ +#define clAmdBlasCtrmm clAmdBlasCtrmm_ +#define clAmdBlasCtrmmEx clAmdBlasCtrmmEx_ +#define clAmdBlasCtrmv clAmdBlasCtrmv_ +#define clAmdBlasCtrsm clAmdBlasCtrsm_ +#define clAmdBlasCtrsmEx clAmdBlasCtrsmEx_ +#define clAmdBlasCtrsv clAmdBlasCtrsv_ +#define clAmdBlasDasum clAmdBlasDasum_ +#define clAmdBlasDaxpy clAmdBlasDaxpy_ +#define clAmdBlasDcopy clAmdBlasDcopy_ +#define clAmdBlasDdot clAmdBlasDdot_ +#define clAmdBlasDgbmv clAmdBlasDgbmv_ +#define clAmdBlasDgemm clAmdBlasDgemm_ +#define clAmdBlasDgemmEx clAmdBlasDgemmEx_ +#define clAmdBlasDgemv clAmdBlasDgemv_ +#define clAmdBlasDgemvEx clAmdBlasDgemvEx_ +#define clAmdBlasDger clAmdBlasDger_ +#define clAmdBlasDnrm2 clAmdBlasDnrm2_ +#define clAmdBlasDrot clAmdBlasDrot_ +#define clAmdBlasDrotg clAmdBlasDrotg_ +#define clAmdBlasDrotm clAmdBlasDrotm_ +#define clAmdBlasDrotmg clAmdBlasDrotmg_ +#define clAmdBlasDsbmv clAmdBlasDsbmv_ +#define clAmdBlasDscal clAmdBlasDscal_ +#define clAmdBlasDspmv clAmdBlasDspmv_ +#define clAmdBlasDspr clAmdBlasDspr_ +#define clAmdBlasDspr2 clAmdBlasDspr2_ +#define clAmdBlasDswap clAmdBlasDswap_ +#define clAmdBlasDsymm clAmdBlasDsymm_ +#define clAmdBlasDsymv clAmdBlasDsymv_ +#define clAmdBlasDsymvEx clAmdBlasDsymvEx_ +#define clAmdBlasDsyr clAmdBlasDsyr_ +#define clAmdBlasDsyr2 clAmdBlasDsyr2_ +#define clAmdBlasDsyr2k clAmdBlasDsyr2k_ +#define clAmdBlasDsyr2kEx clAmdBlasDsyr2kEx_ +#define clAmdBlasDsyrk clAmdBlasDsyrk_ +#define clAmdBlasDsyrkEx clAmdBlasDsyrkEx_ +#define clAmdBlasDtbmv clAmdBlasDtbmv_ +#define clAmdBlasDtbsv clAmdBlasDtbsv_ +#define clAmdBlasDtpmv clAmdBlasDtpmv_ +#define clAmdBlasDtpsv clAmdBlasDtpsv_ +#define clAmdBlasDtrmm clAmdBlasDtrmm_ +#define clAmdBlasDtrmmEx clAmdBlasDtrmmEx_ +#define clAmdBlasDtrmv clAmdBlasDtrmv_ +#define clAmdBlasDtrsm clAmdBlasDtrsm_ +#define clAmdBlasDtrsmEx clAmdBlasDtrsmEx_ +#define clAmdBlasDtrsv clAmdBlasDtrsv_ +#define clAmdBlasDzasum clAmdBlasDzasum_ +#define clAmdBlasDznrm2 clAmdBlasDznrm2_ +#define clAmdBlasGetVersion clAmdBlasGetVersion_ +#define clAmdBlasRemoveScratchImage clAmdBlasRemoveScratchImage_ +#define clAmdBlasSasum clAmdBlasSasum_ +#define clAmdBlasSaxpy clAmdBlasSaxpy_ +#define clAmdBlasScasum clAmdBlasScasum_ +#define clAmdBlasScnrm2 clAmdBlasScnrm2_ +#define clAmdBlasScopy clAmdBlasScopy_ +#define clAmdBlasSdot clAmdBlasSdot_ +#define clAmdBlasSetup clAmdBlasSetup_ +#define clAmdBlasSgbmv clAmdBlasSgbmv_ +#define clAmdBlasSgemm clAmdBlasSgemm_ +#define clAmdBlasSgemmEx clAmdBlasSgemmEx_ +#define clAmdBlasSgemv clAmdBlasSgemv_ +#define clAmdBlasSgemvEx clAmdBlasSgemvEx_ +#define clAmdBlasSger clAmdBlasSger_ +#define clAmdBlasSnrm2 clAmdBlasSnrm2_ +#define clAmdBlasSrot clAmdBlasSrot_ +#define clAmdBlasSrotg clAmdBlasSrotg_ +#define clAmdBlasSrotm clAmdBlasSrotm_ +#define clAmdBlasSrotmg clAmdBlasSrotmg_ +#define clAmdBlasSsbmv clAmdBlasSsbmv_ +#define clAmdBlasSscal clAmdBlasSscal_ +#define clAmdBlasSspmv clAmdBlasSspmv_ +#define clAmdBlasSspr clAmdBlasSspr_ +#define clAmdBlasSspr2 clAmdBlasSspr2_ +#define clAmdBlasSswap clAmdBlasSswap_ +#define clAmdBlasSsymm clAmdBlasSsymm_ +#define clAmdBlasSsymv clAmdBlasSsymv_ +#define clAmdBlasSsymvEx clAmdBlasSsymvEx_ +#define clAmdBlasSsyr clAmdBlasSsyr_ +#define clAmdBlasSsyr2 clAmdBlasSsyr2_ +#define clAmdBlasSsyr2k clAmdBlasSsyr2k_ +#define clAmdBlasSsyr2kEx clAmdBlasSsyr2kEx_ +#define clAmdBlasSsyrk clAmdBlasSsyrk_ +#define clAmdBlasSsyrkEx clAmdBlasSsyrkEx_ +#define clAmdBlasStbmv clAmdBlasStbmv_ +#define clAmdBlasStbsv clAmdBlasStbsv_ +#define clAmdBlasStpmv clAmdBlasStpmv_ +#define clAmdBlasStpsv clAmdBlasStpsv_ +#define clAmdBlasStrmm clAmdBlasStrmm_ +#define clAmdBlasStrmmEx clAmdBlasStrmmEx_ +#define clAmdBlasStrmv clAmdBlasStrmv_ +#define clAmdBlasStrsm clAmdBlasStrsm_ +#define clAmdBlasStrsmEx clAmdBlasStrsmEx_ +#define clAmdBlasStrsv clAmdBlasStrsv_ +#define clAmdBlasTeardown clAmdBlasTeardown_ +#define clAmdBlasZaxpy clAmdBlasZaxpy_ +#define clAmdBlasZcopy clAmdBlasZcopy_ +#define clAmdBlasZdotc clAmdBlasZdotc_ +#define clAmdBlasZdotu clAmdBlasZdotu_ +#define clAmdBlasZdrot clAmdBlasZdrot_ +#define clAmdBlasZdscal clAmdBlasZdscal_ +#define clAmdBlasZgbmv clAmdBlasZgbmv_ +#define clAmdBlasZgemm clAmdBlasZgemm_ +#define clAmdBlasZgemmEx clAmdBlasZgemmEx_ +#define clAmdBlasZgemv clAmdBlasZgemv_ +#define clAmdBlasZgemvEx clAmdBlasZgemvEx_ +#define clAmdBlasZgerc clAmdBlasZgerc_ +#define clAmdBlasZgeru clAmdBlasZgeru_ +#define clAmdBlasZhbmv clAmdBlasZhbmv_ +#define clAmdBlasZhemm clAmdBlasZhemm_ +#define clAmdBlasZhemv clAmdBlasZhemv_ +#define clAmdBlasZher clAmdBlasZher_ +#define clAmdBlasZher2 clAmdBlasZher2_ +#define clAmdBlasZher2k clAmdBlasZher2k_ +#define clAmdBlasZherk clAmdBlasZherk_ +#define clAmdBlasZhpmv clAmdBlasZhpmv_ +#define clAmdBlasZhpr clAmdBlasZhpr_ +#define clAmdBlasZhpr2 clAmdBlasZhpr2_ +#define clAmdBlasZrotg clAmdBlasZrotg_ +#define clAmdBlasZscal clAmdBlasZscal_ +#define clAmdBlasZswap clAmdBlasZswap_ +#define clAmdBlasZsymm clAmdBlasZsymm_ +#define clAmdBlasZsyr2k clAmdBlasZsyr2k_ +#define clAmdBlasZsyr2kEx clAmdBlasZsyr2kEx_ +#define clAmdBlasZsyrk clAmdBlasZsyrk_ +#define clAmdBlasZsyrkEx clAmdBlasZsyrkEx_ +#define clAmdBlasZtbmv clAmdBlasZtbmv_ +#define clAmdBlasZtbsv clAmdBlasZtbsv_ +#define clAmdBlasZtpmv clAmdBlasZtpmv_ +#define clAmdBlasZtpsv clAmdBlasZtpsv_ +#define clAmdBlasZtrmm clAmdBlasZtrmm_ +#define clAmdBlasZtrmmEx clAmdBlasZtrmmEx_ +#define clAmdBlasZtrmv clAmdBlasZtrmv_ +#define clAmdBlasZtrsm clAmdBlasZtrsm_ +#define clAmdBlasZtrsmEx clAmdBlasZtrsmEx_ +#define clAmdBlasZtrsv clAmdBlasZtrsv_ +#define clAmdBlasiCamax clAmdBlasiCamax_ +#define clAmdBlasiDamax clAmdBlasiDamax_ +#define clAmdBlasiSamax clAmdBlasiSamax_ +#define clAmdBlasiZamax clAmdBlasiZamax_ + +#include + +// generated by parser_clamdblas.py +#undef clAmdBlasAddScratchImage +//#define clAmdBlasAddScratchImage clAmdBlasAddScratchImage_pfn +#undef clAmdBlasCaxpy +//#define clAmdBlasCaxpy clAmdBlasCaxpy_pfn +#undef clAmdBlasCcopy +//#define clAmdBlasCcopy clAmdBlasCcopy_pfn +#undef clAmdBlasCdotc +//#define clAmdBlasCdotc clAmdBlasCdotc_pfn +#undef clAmdBlasCdotu +//#define clAmdBlasCdotu clAmdBlasCdotu_pfn +#undef clAmdBlasCgbmv +//#define clAmdBlasCgbmv clAmdBlasCgbmv_pfn +#undef clAmdBlasCgemm +//#define clAmdBlasCgemm clAmdBlasCgemm_pfn +#undef clAmdBlasCgemmEx +#define clAmdBlasCgemmEx clAmdBlasCgemmEx_pfn +#undef clAmdBlasCgemv +//#define clAmdBlasCgemv clAmdBlasCgemv_pfn +#undef clAmdBlasCgemvEx +//#define clAmdBlasCgemvEx clAmdBlasCgemvEx_pfn +#undef clAmdBlasCgerc +//#define clAmdBlasCgerc clAmdBlasCgerc_pfn +#undef clAmdBlasCgeru +//#define clAmdBlasCgeru clAmdBlasCgeru_pfn +#undef clAmdBlasChbmv +//#define clAmdBlasChbmv clAmdBlasChbmv_pfn +#undef clAmdBlasChemm +//#define clAmdBlasChemm clAmdBlasChemm_pfn +#undef clAmdBlasChemv +//#define clAmdBlasChemv clAmdBlasChemv_pfn +#undef clAmdBlasCher +//#define clAmdBlasCher clAmdBlasCher_pfn +#undef clAmdBlasCher2 +//#define clAmdBlasCher2 clAmdBlasCher2_pfn +#undef clAmdBlasCher2k +//#define clAmdBlasCher2k clAmdBlasCher2k_pfn +#undef clAmdBlasCherk +//#define clAmdBlasCherk clAmdBlasCherk_pfn +#undef clAmdBlasChpmv +//#define clAmdBlasChpmv clAmdBlasChpmv_pfn +#undef clAmdBlasChpr +//#define clAmdBlasChpr clAmdBlasChpr_pfn +#undef clAmdBlasChpr2 +//#define clAmdBlasChpr2 clAmdBlasChpr2_pfn +#undef clAmdBlasCrotg +//#define clAmdBlasCrotg clAmdBlasCrotg_pfn +#undef clAmdBlasCscal +//#define clAmdBlasCscal clAmdBlasCscal_pfn +#undef clAmdBlasCsrot +//#define clAmdBlasCsrot clAmdBlasCsrot_pfn +#undef clAmdBlasCsscal +//#define clAmdBlasCsscal clAmdBlasCsscal_pfn +#undef clAmdBlasCswap +//#define clAmdBlasCswap clAmdBlasCswap_pfn +#undef clAmdBlasCsymm +//#define clAmdBlasCsymm clAmdBlasCsymm_pfn +#undef clAmdBlasCsyr2k +//#define clAmdBlasCsyr2k clAmdBlasCsyr2k_pfn +#undef clAmdBlasCsyr2kEx +//#define clAmdBlasCsyr2kEx clAmdBlasCsyr2kEx_pfn +#undef clAmdBlasCsyrk +//#define clAmdBlasCsyrk clAmdBlasCsyrk_pfn +#undef clAmdBlasCsyrkEx +//#define clAmdBlasCsyrkEx clAmdBlasCsyrkEx_pfn +#undef clAmdBlasCtbmv +//#define clAmdBlasCtbmv clAmdBlasCtbmv_pfn +#undef clAmdBlasCtbsv +//#define clAmdBlasCtbsv clAmdBlasCtbsv_pfn +#undef clAmdBlasCtpmv +//#define clAmdBlasCtpmv clAmdBlasCtpmv_pfn +#undef clAmdBlasCtpsv +//#define clAmdBlasCtpsv clAmdBlasCtpsv_pfn +#undef clAmdBlasCtrmm +//#define clAmdBlasCtrmm clAmdBlasCtrmm_pfn +#undef clAmdBlasCtrmmEx +//#define clAmdBlasCtrmmEx clAmdBlasCtrmmEx_pfn +#undef clAmdBlasCtrmv +//#define clAmdBlasCtrmv clAmdBlasCtrmv_pfn +#undef clAmdBlasCtrsm +//#define clAmdBlasCtrsm clAmdBlasCtrsm_pfn +#undef clAmdBlasCtrsmEx +//#define clAmdBlasCtrsmEx clAmdBlasCtrsmEx_pfn +#undef clAmdBlasCtrsv +//#define clAmdBlasCtrsv clAmdBlasCtrsv_pfn +#undef clAmdBlasDasum +//#define clAmdBlasDasum clAmdBlasDasum_pfn +#undef clAmdBlasDaxpy +//#define clAmdBlasDaxpy clAmdBlasDaxpy_pfn +#undef clAmdBlasDcopy +//#define clAmdBlasDcopy clAmdBlasDcopy_pfn +#undef clAmdBlasDdot +//#define clAmdBlasDdot clAmdBlasDdot_pfn +#undef clAmdBlasDgbmv +//#define clAmdBlasDgbmv clAmdBlasDgbmv_pfn +#undef clAmdBlasDgemm +//#define clAmdBlasDgemm clAmdBlasDgemm_pfn +#undef clAmdBlasDgemmEx +#define clAmdBlasDgemmEx clAmdBlasDgemmEx_pfn +#undef clAmdBlasDgemv +//#define clAmdBlasDgemv clAmdBlasDgemv_pfn +#undef clAmdBlasDgemvEx +//#define clAmdBlasDgemvEx clAmdBlasDgemvEx_pfn +#undef clAmdBlasDger +//#define clAmdBlasDger clAmdBlasDger_pfn +#undef clAmdBlasDnrm2 +//#define clAmdBlasDnrm2 clAmdBlasDnrm2_pfn +#undef clAmdBlasDrot +//#define clAmdBlasDrot clAmdBlasDrot_pfn +#undef clAmdBlasDrotg +//#define clAmdBlasDrotg clAmdBlasDrotg_pfn +#undef clAmdBlasDrotm +//#define clAmdBlasDrotm clAmdBlasDrotm_pfn +#undef clAmdBlasDrotmg +//#define clAmdBlasDrotmg clAmdBlasDrotmg_pfn +#undef clAmdBlasDsbmv +//#define clAmdBlasDsbmv clAmdBlasDsbmv_pfn +#undef clAmdBlasDscal +//#define clAmdBlasDscal clAmdBlasDscal_pfn +#undef clAmdBlasDspmv +//#define clAmdBlasDspmv clAmdBlasDspmv_pfn +#undef clAmdBlasDspr +//#define clAmdBlasDspr clAmdBlasDspr_pfn +#undef clAmdBlasDspr2 +//#define clAmdBlasDspr2 clAmdBlasDspr2_pfn +#undef clAmdBlasDswap +//#define clAmdBlasDswap clAmdBlasDswap_pfn +#undef clAmdBlasDsymm +//#define clAmdBlasDsymm clAmdBlasDsymm_pfn +#undef clAmdBlasDsymv +//#define clAmdBlasDsymv clAmdBlasDsymv_pfn +#undef clAmdBlasDsymvEx +//#define clAmdBlasDsymvEx clAmdBlasDsymvEx_pfn +#undef clAmdBlasDsyr +//#define clAmdBlasDsyr clAmdBlasDsyr_pfn +#undef clAmdBlasDsyr2 +//#define clAmdBlasDsyr2 clAmdBlasDsyr2_pfn +#undef clAmdBlasDsyr2k +//#define clAmdBlasDsyr2k clAmdBlasDsyr2k_pfn +#undef clAmdBlasDsyr2kEx +//#define clAmdBlasDsyr2kEx clAmdBlasDsyr2kEx_pfn +#undef clAmdBlasDsyrk +//#define clAmdBlasDsyrk clAmdBlasDsyrk_pfn +#undef clAmdBlasDsyrkEx +//#define clAmdBlasDsyrkEx clAmdBlasDsyrkEx_pfn +#undef clAmdBlasDtbmv +//#define clAmdBlasDtbmv clAmdBlasDtbmv_pfn +#undef clAmdBlasDtbsv +//#define clAmdBlasDtbsv clAmdBlasDtbsv_pfn +#undef clAmdBlasDtpmv +//#define clAmdBlasDtpmv clAmdBlasDtpmv_pfn +#undef clAmdBlasDtpsv +//#define clAmdBlasDtpsv clAmdBlasDtpsv_pfn +#undef clAmdBlasDtrmm +//#define clAmdBlasDtrmm clAmdBlasDtrmm_pfn +#undef clAmdBlasDtrmmEx +//#define clAmdBlasDtrmmEx clAmdBlasDtrmmEx_pfn +#undef clAmdBlasDtrmv +//#define clAmdBlasDtrmv clAmdBlasDtrmv_pfn +#undef clAmdBlasDtrsm +//#define clAmdBlasDtrsm clAmdBlasDtrsm_pfn +#undef clAmdBlasDtrsmEx +//#define clAmdBlasDtrsmEx clAmdBlasDtrsmEx_pfn +#undef clAmdBlasDtrsv +//#define clAmdBlasDtrsv clAmdBlasDtrsv_pfn +#undef clAmdBlasDzasum +//#define clAmdBlasDzasum clAmdBlasDzasum_pfn +#undef clAmdBlasDznrm2 +//#define clAmdBlasDznrm2 clAmdBlasDznrm2_pfn +#undef clAmdBlasGetVersion +//#define clAmdBlasGetVersion clAmdBlasGetVersion_pfn +#undef clAmdBlasRemoveScratchImage +//#define clAmdBlasRemoveScratchImage clAmdBlasRemoveScratchImage_pfn +#undef clAmdBlasSasum +//#define clAmdBlasSasum clAmdBlasSasum_pfn +#undef clAmdBlasSaxpy +//#define clAmdBlasSaxpy clAmdBlasSaxpy_pfn +#undef clAmdBlasScasum +//#define clAmdBlasScasum clAmdBlasScasum_pfn +#undef clAmdBlasScnrm2 +//#define clAmdBlasScnrm2 clAmdBlasScnrm2_pfn +#undef clAmdBlasScopy +//#define clAmdBlasScopy clAmdBlasScopy_pfn +#undef clAmdBlasSdot +//#define clAmdBlasSdot clAmdBlasSdot_pfn +#undef clAmdBlasSetup +#define clAmdBlasSetup clAmdBlasSetup_pfn +#undef clAmdBlasSgbmv +//#define clAmdBlasSgbmv clAmdBlasSgbmv_pfn +#undef clAmdBlasSgemm +//#define clAmdBlasSgemm clAmdBlasSgemm_pfn +#undef clAmdBlasSgemmEx +#define clAmdBlasSgemmEx clAmdBlasSgemmEx_pfn +#undef clAmdBlasSgemv +//#define clAmdBlasSgemv clAmdBlasSgemv_pfn +#undef clAmdBlasSgemvEx +//#define clAmdBlasSgemvEx clAmdBlasSgemvEx_pfn +#undef clAmdBlasSger +//#define clAmdBlasSger clAmdBlasSger_pfn +#undef clAmdBlasSnrm2 +//#define clAmdBlasSnrm2 clAmdBlasSnrm2_pfn +#undef clAmdBlasSrot +//#define clAmdBlasSrot clAmdBlasSrot_pfn +#undef clAmdBlasSrotg +//#define clAmdBlasSrotg clAmdBlasSrotg_pfn +#undef clAmdBlasSrotm +//#define clAmdBlasSrotm clAmdBlasSrotm_pfn +#undef clAmdBlasSrotmg +//#define clAmdBlasSrotmg clAmdBlasSrotmg_pfn +#undef clAmdBlasSsbmv +//#define clAmdBlasSsbmv clAmdBlasSsbmv_pfn +#undef clAmdBlasSscal +//#define clAmdBlasSscal clAmdBlasSscal_pfn +#undef clAmdBlasSspmv +//#define clAmdBlasSspmv clAmdBlasSspmv_pfn +#undef clAmdBlasSspr +//#define clAmdBlasSspr clAmdBlasSspr_pfn +#undef clAmdBlasSspr2 +//#define clAmdBlasSspr2 clAmdBlasSspr2_pfn +#undef clAmdBlasSswap +//#define clAmdBlasSswap clAmdBlasSswap_pfn +#undef clAmdBlasSsymm +//#define clAmdBlasSsymm clAmdBlasSsymm_pfn +#undef clAmdBlasSsymv +//#define clAmdBlasSsymv clAmdBlasSsymv_pfn +#undef clAmdBlasSsymvEx +//#define clAmdBlasSsymvEx clAmdBlasSsymvEx_pfn +#undef clAmdBlasSsyr +//#define clAmdBlasSsyr clAmdBlasSsyr_pfn +#undef clAmdBlasSsyr2 +//#define clAmdBlasSsyr2 clAmdBlasSsyr2_pfn +#undef clAmdBlasSsyr2k +//#define clAmdBlasSsyr2k clAmdBlasSsyr2k_pfn +#undef clAmdBlasSsyr2kEx +//#define clAmdBlasSsyr2kEx clAmdBlasSsyr2kEx_pfn +#undef clAmdBlasSsyrk +//#define clAmdBlasSsyrk clAmdBlasSsyrk_pfn +#undef clAmdBlasSsyrkEx +//#define clAmdBlasSsyrkEx clAmdBlasSsyrkEx_pfn +#undef clAmdBlasStbmv +//#define clAmdBlasStbmv clAmdBlasStbmv_pfn +#undef clAmdBlasStbsv +//#define clAmdBlasStbsv clAmdBlasStbsv_pfn +#undef clAmdBlasStpmv +//#define clAmdBlasStpmv clAmdBlasStpmv_pfn +#undef clAmdBlasStpsv +//#define clAmdBlasStpsv clAmdBlasStpsv_pfn +#undef clAmdBlasStrmm +//#define clAmdBlasStrmm clAmdBlasStrmm_pfn +#undef clAmdBlasStrmmEx +//#define clAmdBlasStrmmEx clAmdBlasStrmmEx_pfn +#undef clAmdBlasStrmv +//#define clAmdBlasStrmv clAmdBlasStrmv_pfn +#undef clAmdBlasStrsm +//#define clAmdBlasStrsm clAmdBlasStrsm_pfn +#undef clAmdBlasStrsmEx +//#define clAmdBlasStrsmEx clAmdBlasStrsmEx_pfn +#undef clAmdBlasStrsv +//#define clAmdBlasStrsv clAmdBlasStrsv_pfn +#undef clAmdBlasTeardown +#define clAmdBlasTeardown clAmdBlasTeardown_pfn +#undef clAmdBlasZaxpy +//#define clAmdBlasZaxpy clAmdBlasZaxpy_pfn +#undef clAmdBlasZcopy +//#define clAmdBlasZcopy clAmdBlasZcopy_pfn +#undef clAmdBlasZdotc +//#define clAmdBlasZdotc clAmdBlasZdotc_pfn +#undef clAmdBlasZdotu +//#define clAmdBlasZdotu clAmdBlasZdotu_pfn +#undef clAmdBlasZdrot +//#define clAmdBlasZdrot clAmdBlasZdrot_pfn +#undef clAmdBlasZdscal +//#define clAmdBlasZdscal clAmdBlasZdscal_pfn +#undef clAmdBlasZgbmv +//#define clAmdBlasZgbmv clAmdBlasZgbmv_pfn +#undef clAmdBlasZgemm +//#define clAmdBlasZgemm clAmdBlasZgemm_pfn +#undef clAmdBlasZgemmEx +#define clAmdBlasZgemmEx clAmdBlasZgemmEx_pfn +#undef clAmdBlasZgemv +//#define clAmdBlasZgemv clAmdBlasZgemv_pfn +#undef clAmdBlasZgemvEx +//#define clAmdBlasZgemvEx clAmdBlasZgemvEx_pfn +#undef clAmdBlasZgerc +//#define clAmdBlasZgerc clAmdBlasZgerc_pfn +#undef clAmdBlasZgeru +//#define clAmdBlasZgeru clAmdBlasZgeru_pfn +#undef clAmdBlasZhbmv +//#define clAmdBlasZhbmv clAmdBlasZhbmv_pfn +#undef clAmdBlasZhemm +//#define clAmdBlasZhemm clAmdBlasZhemm_pfn +#undef clAmdBlasZhemv +//#define clAmdBlasZhemv clAmdBlasZhemv_pfn +#undef clAmdBlasZher +//#define clAmdBlasZher clAmdBlasZher_pfn +#undef clAmdBlasZher2 +//#define clAmdBlasZher2 clAmdBlasZher2_pfn +#undef clAmdBlasZher2k +//#define clAmdBlasZher2k clAmdBlasZher2k_pfn +#undef clAmdBlasZherk +//#define clAmdBlasZherk clAmdBlasZherk_pfn +#undef clAmdBlasZhpmv +//#define clAmdBlasZhpmv clAmdBlasZhpmv_pfn +#undef clAmdBlasZhpr +//#define clAmdBlasZhpr clAmdBlasZhpr_pfn +#undef clAmdBlasZhpr2 +//#define clAmdBlasZhpr2 clAmdBlasZhpr2_pfn +#undef clAmdBlasZrotg +//#define clAmdBlasZrotg clAmdBlasZrotg_pfn +#undef clAmdBlasZscal +//#define clAmdBlasZscal clAmdBlasZscal_pfn +#undef clAmdBlasZswap +//#define clAmdBlasZswap clAmdBlasZswap_pfn +#undef clAmdBlasZsymm +//#define clAmdBlasZsymm clAmdBlasZsymm_pfn +#undef clAmdBlasZsyr2k +//#define clAmdBlasZsyr2k clAmdBlasZsyr2k_pfn +#undef clAmdBlasZsyr2kEx +//#define clAmdBlasZsyr2kEx clAmdBlasZsyr2kEx_pfn +#undef clAmdBlasZsyrk +//#define clAmdBlasZsyrk clAmdBlasZsyrk_pfn +#undef clAmdBlasZsyrkEx +//#define clAmdBlasZsyrkEx clAmdBlasZsyrkEx_pfn +#undef clAmdBlasZtbmv +//#define clAmdBlasZtbmv clAmdBlasZtbmv_pfn +#undef clAmdBlasZtbsv +//#define clAmdBlasZtbsv clAmdBlasZtbsv_pfn +#undef clAmdBlasZtpmv +//#define clAmdBlasZtpmv clAmdBlasZtpmv_pfn +#undef clAmdBlasZtpsv +//#define clAmdBlasZtpsv clAmdBlasZtpsv_pfn +#undef clAmdBlasZtrmm +//#define clAmdBlasZtrmm clAmdBlasZtrmm_pfn +#undef clAmdBlasZtrmmEx +//#define clAmdBlasZtrmmEx clAmdBlasZtrmmEx_pfn +#undef clAmdBlasZtrmv +//#define clAmdBlasZtrmv clAmdBlasZtrmv_pfn +#undef clAmdBlasZtrsm +//#define clAmdBlasZtrsm clAmdBlasZtrsm_pfn +#undef clAmdBlasZtrsmEx +//#define clAmdBlasZtrsmEx clAmdBlasZtrsmEx_pfn +#undef clAmdBlasZtrsv +//#define clAmdBlasZtrsv clAmdBlasZtrsv_pfn +#undef clAmdBlasiCamax +//#define clAmdBlasiCamax clAmdBlasiCamax_pfn +#undef clAmdBlasiDamax +//#define clAmdBlasiDamax clAmdBlasiDamax_pfn +#undef clAmdBlasiSamax +//#define clAmdBlasiSamax clAmdBlasiSamax_pfn +#undef clAmdBlasiZamax +//#define clAmdBlasiZamax clAmdBlasiZamax_pfn + +// generated by parser_clamdblas.py +//extern CL_RUNTIME_EXPORT cl_ulong (*clAmdBlasAddScratchImage)(cl_context context, size_t width, size_t height, clAmdBlasStatus* status); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCaxpy)(size_t N, cl_float2 alpha, const cl_mem X, size_t offx, int incx, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCcopy)(size_t N, const cl_mem X, size_t offx, int incx, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCdotc)(size_t N, cl_mem dotProduct, size_t offDP, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCdotu)(size_t N, cl_mem dotProduct, size_t offDP, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCgbmv)(clAmdBlasOrder order, clAmdBlasTranspose trans, size_t M, size_t N, size_t KL, size_t KU, cl_float2 alpha, const cl_mem A, size_t offa, size_t lda, const cl_mem X, size_t offx, int incx, cl_float2 beta, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCgemm)(clAmdBlasOrder order, clAmdBlasTranspose transA, clAmdBlasTranspose transB, size_t M, size_t N, size_t K, FloatComplex alpha, const cl_mem A, size_t lda, const cl_mem B, size_t ldb, FloatComplex beta, cl_mem C, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCgemmEx)(clAmdBlasOrder order, clAmdBlasTranspose transA, clAmdBlasTranspose transB, size_t M, size_t N, size_t K, FloatComplex alpha, const cl_mem A, size_t offA, size_t lda, const cl_mem B, size_t offB, size_t ldb, FloatComplex beta, cl_mem C, size_t offC, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCgemv)(clAmdBlasOrder order, clAmdBlasTranspose transA, size_t M, size_t N, FloatComplex alpha, const cl_mem A, size_t lda, const cl_mem x, size_t offx, int incx, FloatComplex beta, cl_mem y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCgemvEx)(clAmdBlasOrder order, clAmdBlasTranspose transA, size_t M, size_t N, FloatComplex alpha, const cl_mem A, size_t offA, size_t lda, const cl_mem x, size_t offx, int incx, FloatComplex beta, cl_mem y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCgerc)(clAmdBlasOrder order, size_t M, size_t N, cl_float2 alpha, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem A, size_t offa, size_t lda, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCgeru)(clAmdBlasOrder order, size_t M, size_t N, cl_float2 alpha, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem A, size_t offa, size_t lda, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasChbmv)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, size_t K, cl_float2 alpha, const cl_mem A, size_t offa, size_t lda, const cl_mem X, size_t offx, int incx, cl_float2 beta, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasChemm)(clAmdBlasOrder order, clAmdBlasSide side, clAmdBlasUplo uplo, size_t M, size_t N, cl_float2 alpha, const cl_mem A, size_t offa, size_t lda, const cl_mem B, size_t offb, size_t ldb, cl_float2 beta, cl_mem C, size_t offc, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasChemv)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, FloatComplex alpha, const cl_mem A, size_t offa, size_t lda, const cl_mem X, size_t offx, int incx, FloatComplex beta, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCher)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, cl_float alpha, const cl_mem X, size_t offx, int incx, cl_mem A, size_t offa, size_t lda, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCher2)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, cl_float2 alpha, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem A, size_t offa, size_t lda, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCher2k)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose trans, size_t N, size_t K, FloatComplex alpha, const cl_mem A, size_t offa, size_t lda, const cl_mem B, size_t offb, size_t ldb, cl_float beta, cl_mem C, size_t offc, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCherk)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose transA, size_t N, size_t K, float alpha, const cl_mem A, size_t offa, size_t lda, float beta, cl_mem C, size_t offc, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasChpmv)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, cl_float2 alpha, const cl_mem AP, size_t offa, const cl_mem X, size_t offx, int incx, cl_float2 beta, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasChpr)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, cl_float alpha, const cl_mem X, size_t offx, int incx, cl_mem AP, size_t offa, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasChpr2)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, cl_float2 alpha, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem AP, size_t offa, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCrotg)(cl_mem CA, size_t offCA, cl_mem CB, size_t offCB, cl_mem C, size_t offC, cl_mem S, size_t offS, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCscal)(size_t N, cl_float2 alpha, cl_mem X, size_t offx, int incx, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCsrot)(size_t N, cl_mem X, size_t offx, int incx, cl_mem Y, size_t offy, int incy, cl_float C, cl_float S, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCsscal)(size_t N, cl_float alpha, cl_mem X, size_t offx, int incx, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCswap)(size_t N, cl_mem X, size_t offx, int incx, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCsymm)(clAmdBlasOrder order, clAmdBlasSide side, clAmdBlasUplo uplo, size_t M, size_t N, cl_float2 alpha, const cl_mem A, size_t offa, size_t lda, const cl_mem B, size_t offb, size_t ldb, cl_float2 beta, cl_mem C, size_t offc, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCsyr2k)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose transAB, size_t N, size_t K, FloatComplex alpha, const cl_mem A, size_t lda, const cl_mem B, size_t ldb, FloatComplex beta, cl_mem C, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCsyr2kEx)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose transAB, size_t N, size_t K, FloatComplex alpha, const cl_mem A, size_t offA, size_t lda, const cl_mem B, size_t offB, size_t ldb, FloatComplex beta, cl_mem C, size_t offC, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCsyrk)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose transA, size_t N, size_t K, FloatComplex alpha, const cl_mem A, size_t lda, FloatComplex beta, cl_mem C, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCsyrkEx)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose transA, size_t N, size_t K, FloatComplex alpha, const cl_mem A, size_t offA, size_t lda, FloatComplex beta, cl_mem C, size_t offC, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCtbmv)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose trans, clAmdBlasDiag diag, size_t N, size_t K, const cl_mem A, size_t offa, size_t lda, cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCtbsv)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose trans, clAmdBlasDiag diag, size_t N, size_t K, const cl_mem A, size_t offa, size_t lda, cl_mem X, size_t offx, int incx, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCtpmv)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose trans, clAmdBlasDiag diag, size_t N, const cl_mem AP, size_t offa, cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCtpsv)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose trans, clAmdBlasDiag diag, size_t N, const cl_mem A, size_t offa, cl_mem X, size_t offx, int incx, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCtrmm)(clAmdBlasOrder order, clAmdBlasSide side, clAmdBlasUplo uplo, clAmdBlasTranspose transA, clAmdBlasDiag diag, size_t M, size_t N, FloatComplex alpha, const cl_mem A, size_t lda, cl_mem B, size_t ldb, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCtrmmEx)(clAmdBlasOrder order, clAmdBlasSide side, clAmdBlasUplo uplo, clAmdBlasTranspose transA, clAmdBlasDiag diag, size_t M, size_t N, FloatComplex alpha, const cl_mem A, size_t offA, size_t lda, cl_mem B, size_t offB, size_t ldb, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCtrmv)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose trans, clAmdBlasDiag diag, size_t N, const cl_mem A, size_t offa, size_t lda, cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCtrsm)(clAmdBlasOrder order, clAmdBlasSide side, clAmdBlasUplo uplo, clAmdBlasTranspose transA, clAmdBlasDiag diag, size_t M, size_t N, FloatComplex alpha, const cl_mem A, size_t lda, cl_mem B, size_t ldb, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCtrsmEx)(clAmdBlasOrder order, clAmdBlasSide side, clAmdBlasUplo uplo, clAmdBlasTranspose transA, clAmdBlasDiag diag, size_t M, size_t N, FloatComplex alpha, const cl_mem A, size_t offA, size_t lda, cl_mem B, size_t offB, size_t ldb, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCtrsv)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose trans, clAmdBlasDiag diag, size_t N, const cl_mem A, size_t offa, size_t lda, cl_mem X, size_t offx, int incx, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDasum)(size_t N, cl_mem asum, size_t offAsum, const cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDaxpy)(size_t N, cl_double alpha, const cl_mem X, size_t offx, int incx, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDcopy)(size_t N, const cl_mem X, size_t offx, int incx, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDdot)(size_t N, cl_mem dotProduct, size_t offDP, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDgbmv)(clAmdBlasOrder order, clAmdBlasTranspose trans, size_t M, size_t N, size_t KL, size_t KU, cl_double alpha, const cl_mem A, size_t offa, size_t lda, const cl_mem X, size_t offx, int incx, cl_double beta, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDgemm)(clAmdBlasOrder order, clAmdBlasTranspose transA, clAmdBlasTranspose transB, size_t M, size_t N, size_t K, cl_double alpha, const cl_mem A, size_t lda, const cl_mem B, size_t ldb, cl_double beta, cl_mem C, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDgemmEx)(clAmdBlasOrder order, clAmdBlasTranspose transA, clAmdBlasTranspose transB, size_t M, size_t N, size_t K, cl_double alpha, const cl_mem A, size_t offA, size_t lda, const cl_mem B, size_t offB, size_t ldb, cl_double beta, cl_mem C, size_t offC, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDgemv)(clAmdBlasOrder order, clAmdBlasTranspose transA, size_t M, size_t N, cl_double alpha, const cl_mem A, size_t lda, const cl_mem x, size_t offx, int incx, cl_double beta, cl_mem y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDgemvEx)(clAmdBlasOrder order, clAmdBlasTranspose transA, size_t M, size_t N, cl_double alpha, const cl_mem A, size_t offA, size_t lda, const cl_mem x, size_t offx, int incx, cl_double beta, cl_mem y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDger)(clAmdBlasOrder order, size_t M, size_t N, cl_double alpha, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem A, size_t offa, size_t lda, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDnrm2)(size_t N, cl_mem NRM2, size_t offNRM2, const cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDrot)(size_t N, cl_mem X, size_t offx, int incx, cl_mem Y, size_t offy, int incy, cl_double C, cl_double S, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDrotg)(cl_mem DA, size_t offDA, cl_mem DB, size_t offDB, cl_mem C, size_t offC, cl_mem S, size_t offS, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDrotm)(size_t N, cl_mem X, size_t offx, int incx, cl_mem Y, size_t offy, int incy, const cl_mem DPARAM, size_t offDparam, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDrotmg)(cl_mem DD1, size_t offDD1, cl_mem DD2, size_t offDD2, cl_mem DX1, size_t offDX1, const cl_mem DY1, size_t offDY1, cl_mem DPARAM, size_t offDparam, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDsbmv)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, size_t K, cl_double alpha, const cl_mem A, size_t offa, size_t lda, const cl_mem X, size_t offx, int incx, cl_double beta, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDscal)(size_t N, cl_double alpha, cl_mem X, size_t offx, int incx, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDspmv)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, cl_double alpha, const cl_mem AP, size_t offa, const cl_mem X, size_t offx, int incx, cl_double beta, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDspr)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, cl_double alpha, const cl_mem X, size_t offx, int incx, cl_mem AP, size_t offa, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDspr2)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, cl_double alpha, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem AP, size_t offa, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDswap)(size_t N, cl_mem X, size_t offx, int incx, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDsymm)(clAmdBlasOrder order, clAmdBlasSide side, clAmdBlasUplo uplo, size_t M, size_t N, cl_double alpha, const cl_mem A, size_t offa, size_t lda, const cl_mem B, size_t offb, size_t ldb, cl_double beta, cl_mem C, size_t offc, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDsymv)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, cl_double alpha, const cl_mem A, size_t lda, const cl_mem x, size_t offx, int incx, cl_double beta, cl_mem y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDsymvEx)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, cl_double alpha, const cl_mem A, size_t offA, size_t lda, const cl_mem x, size_t offx, int incx, cl_double beta, cl_mem y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDsyr)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, cl_double alpha, const cl_mem X, size_t offx, int incx, cl_mem A, size_t offa, size_t lda, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDsyr2)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, cl_double alpha, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem A, size_t offa, size_t lda, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDsyr2k)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose transAB, size_t N, size_t K, cl_double alpha, const cl_mem A, size_t lda, const cl_mem B, size_t ldb, cl_double beta, cl_mem C, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDsyr2kEx)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose transAB, size_t N, size_t K, cl_double alpha, const cl_mem A, size_t offA, size_t lda, const cl_mem B, size_t offB, size_t ldb, cl_double beta, cl_mem C, size_t offC, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDsyrk)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose transA, size_t N, size_t K, cl_double alpha, const cl_mem A, size_t lda, cl_double beta, cl_mem C, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDsyrkEx)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose transA, size_t N, size_t K, cl_double alpha, const cl_mem A, size_t offA, size_t lda, cl_double beta, cl_mem C, size_t offC, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDtbmv)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose trans, clAmdBlasDiag diag, size_t N, size_t K, const cl_mem A, size_t offa, size_t lda, cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDtbsv)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose trans, clAmdBlasDiag diag, size_t N, size_t K, const cl_mem A, size_t offa, size_t lda, cl_mem X, size_t offx, int incx, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDtpmv)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose trans, clAmdBlasDiag diag, size_t N, const cl_mem AP, size_t offa, cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDtpsv)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose trans, clAmdBlasDiag diag, size_t N, const cl_mem A, size_t offa, cl_mem X, size_t offx, int incx, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDtrmm)(clAmdBlasOrder order, clAmdBlasSide side, clAmdBlasUplo uplo, clAmdBlasTranspose transA, clAmdBlasDiag diag, size_t M, size_t N, cl_double alpha, const cl_mem A, size_t lda, cl_mem B, size_t ldb, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDtrmmEx)(clAmdBlasOrder order, clAmdBlasSide side, clAmdBlasUplo uplo, clAmdBlasTranspose transA, clAmdBlasDiag diag, size_t M, size_t N, cl_double alpha, const cl_mem A, size_t offA, size_t lda, cl_mem B, size_t offB, size_t ldb, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDtrmv)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose trans, clAmdBlasDiag diag, size_t N, const cl_mem A, size_t offa, size_t lda, cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDtrsm)(clAmdBlasOrder order, clAmdBlasSide side, clAmdBlasUplo uplo, clAmdBlasTranspose transA, clAmdBlasDiag diag, size_t M, size_t N, cl_double alpha, const cl_mem A, size_t lda, cl_mem B, size_t ldb, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDtrsmEx)(clAmdBlasOrder order, clAmdBlasSide side, clAmdBlasUplo uplo, clAmdBlasTranspose transA, clAmdBlasDiag diag, size_t M, size_t N, cl_double alpha, const cl_mem A, size_t offA, size_t lda, cl_mem B, size_t offB, size_t ldb, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDtrsv)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose trans, clAmdBlasDiag diag, size_t N, const cl_mem A, size_t offa, size_t lda, cl_mem X, size_t offx, int incx, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDzasum)(size_t N, cl_mem asum, size_t offAsum, const cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDznrm2)(size_t N, cl_mem NRM2, size_t offNRM2, const cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasGetVersion)(cl_uint* major, cl_uint* minor, cl_uint* patch); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasRemoveScratchImage)(cl_ulong imageID); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSasum)(size_t N, cl_mem asum, size_t offAsum, const cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSaxpy)(size_t N, cl_float alpha, const cl_mem X, size_t offx, int incx, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasScasum)(size_t N, cl_mem asum, size_t offAsum, const cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasScnrm2)(size_t N, cl_mem NRM2, size_t offNRM2, const cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasScopy)(size_t N, const cl_mem X, size_t offx, int incx, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSdot)(size_t N, cl_mem dotProduct, size_t offDP, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSetup)(); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSgbmv)(clAmdBlasOrder order, clAmdBlasTranspose trans, size_t M, size_t N, size_t KL, size_t KU, cl_float alpha, const cl_mem A, size_t offa, size_t lda, const cl_mem X, size_t offx, int incx, cl_float beta, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSgemm)(clAmdBlasOrder order, clAmdBlasTranspose transA, clAmdBlasTranspose transB, size_t M, size_t N, size_t K, cl_float alpha, const cl_mem A, size_t lda, const cl_mem B, size_t ldb, cl_float beta, cl_mem C, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSgemmEx)(clAmdBlasOrder order, clAmdBlasTranspose transA, clAmdBlasTranspose transB, size_t M, size_t N, size_t K, cl_float alpha, const cl_mem A, size_t offA, size_t lda, const cl_mem B, size_t offB, size_t ldb, cl_float beta, cl_mem C, size_t offC, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSgemv)(clAmdBlasOrder order, clAmdBlasTranspose transA, size_t M, size_t N, cl_float alpha, const cl_mem A, size_t lda, const cl_mem x, size_t offx, int incx, cl_float beta, cl_mem y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSgemvEx)(clAmdBlasOrder order, clAmdBlasTranspose transA, size_t M, size_t N, cl_float alpha, const cl_mem A, size_t offA, size_t lda, const cl_mem x, size_t offx, int incx, cl_float beta, cl_mem y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSger)(clAmdBlasOrder order, size_t M, size_t N, cl_float alpha, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem A, size_t offa, size_t lda, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSnrm2)(size_t N, cl_mem NRM2, size_t offNRM2, const cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSrot)(size_t N, cl_mem X, size_t offx, int incx, cl_mem Y, size_t offy, int incy, cl_float C, cl_float S, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSrotg)(cl_mem SA, size_t offSA, cl_mem SB, size_t offSB, cl_mem C, size_t offC, cl_mem S, size_t offS, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSrotm)(size_t N, cl_mem X, size_t offx, int incx, cl_mem Y, size_t offy, int incy, const cl_mem SPARAM, size_t offSparam, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSrotmg)(cl_mem SD1, size_t offSD1, cl_mem SD2, size_t offSD2, cl_mem SX1, size_t offSX1, const cl_mem SY1, size_t offSY1, cl_mem SPARAM, size_t offSparam, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSsbmv)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, size_t K, cl_float alpha, const cl_mem A, size_t offa, size_t lda, const cl_mem X, size_t offx, int incx, cl_float beta, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSscal)(size_t N, cl_float alpha, cl_mem X, size_t offx, int incx, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSspmv)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, cl_float alpha, const cl_mem AP, size_t offa, const cl_mem X, size_t offx, int incx, cl_float beta, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSspr)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, cl_float alpha, const cl_mem X, size_t offx, int incx, cl_mem AP, size_t offa, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSspr2)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, cl_float alpha, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem AP, size_t offa, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSswap)(size_t N, cl_mem X, size_t offx, int incx, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSsymm)(clAmdBlasOrder order, clAmdBlasSide side, clAmdBlasUplo uplo, size_t M, size_t N, cl_float alpha, const cl_mem A, size_t offa, size_t lda, const cl_mem B, size_t offb, size_t ldb, cl_float beta, cl_mem C, size_t offc, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSsymv)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, cl_float alpha, const cl_mem A, size_t lda, const cl_mem x, size_t offx, int incx, cl_float beta, cl_mem y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSsymvEx)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, cl_float alpha, const cl_mem A, size_t offA, size_t lda, const cl_mem x, size_t offx, int incx, cl_float beta, cl_mem y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSsyr)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, cl_float alpha, const cl_mem X, size_t offx, int incx, cl_mem A, size_t offa, size_t lda, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSsyr2)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, cl_float alpha, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem A, size_t offa, size_t lda, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSsyr2k)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose transAB, size_t N, size_t K, cl_float alpha, const cl_mem A, size_t lda, const cl_mem B, size_t ldb, cl_float beta, cl_mem C, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSsyr2kEx)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose transAB, size_t N, size_t K, cl_float alpha, const cl_mem A, size_t offA, size_t lda, const cl_mem B, size_t offB, size_t ldb, cl_float beta, cl_mem C, size_t offC, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSsyrk)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose transA, size_t N, size_t K, cl_float alpha, const cl_mem A, size_t lda, cl_float beta, cl_mem C, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSsyrkEx)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose transA, size_t N, size_t K, cl_float alpha, const cl_mem A, size_t offA, size_t lda, cl_float beta, cl_mem C, size_t offC, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasStbmv)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose trans, clAmdBlasDiag diag, size_t N, size_t K, const cl_mem A, size_t offa, size_t lda, cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasStbsv)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose trans, clAmdBlasDiag diag, size_t N, size_t K, const cl_mem A, size_t offa, size_t lda, cl_mem X, size_t offx, int incx, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasStpmv)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose trans, clAmdBlasDiag diag, size_t N, const cl_mem AP, size_t offa, cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasStpsv)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose trans, clAmdBlasDiag diag, size_t N, const cl_mem A, size_t offa, cl_mem X, size_t offx, int incx, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasStrmm)(clAmdBlasOrder order, clAmdBlasSide side, clAmdBlasUplo uplo, clAmdBlasTranspose transA, clAmdBlasDiag diag, size_t M, size_t N, cl_float alpha, const cl_mem A, size_t lda, cl_mem B, size_t ldb, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasStrmmEx)(clAmdBlasOrder order, clAmdBlasSide side, clAmdBlasUplo uplo, clAmdBlasTranspose transA, clAmdBlasDiag diag, size_t M, size_t N, cl_float alpha, const cl_mem A, size_t offA, size_t lda, cl_mem B, size_t offB, size_t ldb, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasStrmv)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose trans, clAmdBlasDiag diag, size_t N, const cl_mem A, size_t offa, size_t lda, cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasStrsm)(clAmdBlasOrder order, clAmdBlasSide side, clAmdBlasUplo uplo, clAmdBlasTranspose transA, clAmdBlasDiag diag, size_t M, size_t N, cl_float alpha, const cl_mem A, size_t lda, cl_mem B, size_t ldb, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasStrsmEx)(clAmdBlasOrder order, clAmdBlasSide side, clAmdBlasUplo uplo, clAmdBlasTranspose transA, clAmdBlasDiag diag, size_t M, size_t N, cl_float alpha, const cl_mem A, size_t offA, size_t lda, cl_mem B, size_t offB, size_t ldb, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasStrsv)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose trans, clAmdBlasDiag diag, size_t N, const cl_mem A, size_t offa, size_t lda, cl_mem X, size_t offx, int incx, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT void (*clAmdBlasTeardown)(); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZaxpy)(size_t N, cl_double2 alpha, const cl_mem X, size_t offx, int incx, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZcopy)(size_t N, const cl_mem X, size_t offx, int incx, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZdotc)(size_t N, cl_mem dotProduct, size_t offDP, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZdotu)(size_t N, cl_mem dotProduct, size_t offDP, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZdrot)(size_t N, cl_mem X, size_t offx, int incx, cl_mem Y, size_t offy, int incy, cl_double C, cl_double S, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZdscal)(size_t N, cl_double alpha, cl_mem X, size_t offx, int incx, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZgbmv)(clAmdBlasOrder order, clAmdBlasTranspose trans, size_t M, size_t N, size_t KL, size_t KU, cl_double2 alpha, const cl_mem A, size_t offa, size_t lda, const cl_mem X, size_t offx, int incx, cl_double2 beta, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZgemm)(clAmdBlasOrder order, clAmdBlasTranspose transA, clAmdBlasTranspose transB, size_t M, size_t N, size_t K, DoubleComplex alpha, const cl_mem A, size_t lda, const cl_mem B, size_t ldb, DoubleComplex beta, cl_mem C, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZgemmEx)(clAmdBlasOrder order, clAmdBlasTranspose transA, clAmdBlasTranspose transB, size_t M, size_t N, size_t K, DoubleComplex alpha, const cl_mem A, size_t offA, size_t lda, const cl_mem B, size_t offB, size_t ldb, DoubleComplex beta, cl_mem C, size_t offC, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZgemv)(clAmdBlasOrder order, clAmdBlasTranspose transA, size_t M, size_t N, DoubleComplex alpha, const cl_mem A, size_t lda, const cl_mem x, size_t offx, int incx, DoubleComplex beta, cl_mem y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZgemvEx)(clAmdBlasOrder order, clAmdBlasTranspose transA, size_t M, size_t N, DoubleComplex alpha, const cl_mem A, size_t offA, size_t lda, const cl_mem x, size_t offx, int incx, DoubleComplex beta, cl_mem y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZgerc)(clAmdBlasOrder order, size_t M, size_t N, cl_double2 alpha, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem A, size_t offa, size_t lda, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZgeru)(clAmdBlasOrder order, size_t M, size_t N, cl_double2 alpha, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem A, size_t offa, size_t lda, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZhbmv)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, size_t K, cl_double2 alpha, const cl_mem A, size_t offa, size_t lda, const cl_mem X, size_t offx, int incx, cl_double2 beta, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZhemm)(clAmdBlasOrder order, clAmdBlasSide side, clAmdBlasUplo uplo, size_t M, size_t N, cl_double2 alpha, const cl_mem A, size_t offa, size_t lda, const cl_mem B, size_t offb, size_t ldb, cl_double2 beta, cl_mem C, size_t offc, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZhemv)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, DoubleComplex alpha, const cl_mem A, size_t offa, size_t lda, const cl_mem X, size_t offx, int incx, DoubleComplex beta, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZher)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, cl_double alpha, const cl_mem X, size_t offx, int incx, cl_mem A, size_t offa, size_t lda, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZher2)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, cl_double2 alpha, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem A, size_t offa, size_t lda, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZher2k)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose trans, size_t N, size_t K, DoubleComplex alpha, const cl_mem A, size_t offa, size_t lda, const cl_mem B, size_t offb, size_t ldb, cl_double beta, cl_mem C, size_t offc, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZherk)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose transA, size_t N, size_t K, double alpha, const cl_mem A, size_t offa, size_t lda, double beta, cl_mem C, size_t offc, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZhpmv)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, cl_double2 alpha, const cl_mem AP, size_t offa, const cl_mem X, size_t offx, int incx, cl_double2 beta, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZhpr)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, cl_double alpha, const cl_mem X, size_t offx, int incx, cl_mem AP, size_t offa, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZhpr2)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, cl_double2 alpha, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem AP, size_t offa, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZrotg)(cl_mem CA, size_t offCA, cl_mem CB, size_t offCB, cl_mem C, size_t offC, cl_mem S, size_t offS, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZscal)(size_t N, cl_double2 alpha, cl_mem X, size_t offx, int incx, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZswap)(size_t N, cl_mem X, size_t offx, int incx, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZsymm)(clAmdBlasOrder order, clAmdBlasSide side, clAmdBlasUplo uplo, size_t M, size_t N, cl_double2 alpha, const cl_mem A, size_t offa, size_t lda, const cl_mem B, size_t offb, size_t ldb, cl_double2 beta, cl_mem C, size_t offc, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZsyr2k)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose transAB, size_t N, size_t K, DoubleComplex alpha, const cl_mem A, size_t lda, const cl_mem B, size_t ldb, DoubleComplex beta, cl_mem C, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZsyr2kEx)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose transAB, size_t N, size_t K, DoubleComplex alpha, const cl_mem A, size_t offA, size_t lda, const cl_mem B, size_t offB, size_t ldb, DoubleComplex beta, cl_mem C, size_t offC, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZsyrk)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose transA, size_t N, size_t K, DoubleComplex alpha, const cl_mem A, size_t lda, DoubleComplex beta, cl_mem C, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZsyrkEx)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose transA, size_t N, size_t K, DoubleComplex alpha, const cl_mem A, size_t offA, size_t lda, DoubleComplex beta, cl_mem C, size_t offC, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZtbmv)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose trans, clAmdBlasDiag diag, size_t N, size_t K, const cl_mem A, size_t offa, size_t lda, cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZtbsv)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose trans, clAmdBlasDiag diag, size_t N, size_t K, const cl_mem A, size_t offa, size_t lda, cl_mem X, size_t offx, int incx, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZtpmv)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose trans, clAmdBlasDiag diag, size_t N, const cl_mem AP, size_t offa, cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZtpsv)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose trans, clAmdBlasDiag diag, size_t N, const cl_mem A, size_t offa, cl_mem X, size_t offx, int incx, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZtrmm)(clAmdBlasOrder order, clAmdBlasSide side, clAmdBlasUplo uplo, clAmdBlasTranspose transA, clAmdBlasDiag diag, size_t M, size_t N, DoubleComplex alpha, const cl_mem A, size_t lda, cl_mem B, size_t ldb, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZtrmmEx)(clAmdBlasOrder order, clAmdBlasSide side, clAmdBlasUplo uplo, clAmdBlasTranspose transA, clAmdBlasDiag diag, size_t M, size_t N, DoubleComplex alpha, const cl_mem A, size_t offA, size_t lda, cl_mem B, size_t offB, size_t ldb, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZtrmv)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose trans, clAmdBlasDiag diag, size_t N, const cl_mem A, size_t offa, size_t lda, cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZtrsm)(clAmdBlasOrder order, clAmdBlasSide side, clAmdBlasUplo uplo, clAmdBlasTranspose transA, clAmdBlasDiag diag, size_t M, size_t N, DoubleComplex alpha, const cl_mem A, size_t lda, cl_mem B, size_t ldb, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZtrsmEx)(clAmdBlasOrder order, clAmdBlasSide side, clAmdBlasUplo uplo, clAmdBlasTranspose transA, clAmdBlasDiag diag, size_t M, size_t N, DoubleComplex alpha, const cl_mem A, size_t offA, size_t lda, cl_mem B, size_t offB, size_t ldb, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZtrsv)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose trans, clAmdBlasDiag diag, size_t N, const cl_mem A, size_t offa, size_t lda, cl_mem X, size_t offx, int incx, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasiCamax)(size_t N, cl_mem iMax, size_t offiMax, const cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasiDamax)(size_t N, cl_mem iMax, size_t offiMax, const cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasiSamax)(size_t N, cl_mem iMax, size_t offiMax, const cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasiZamax)(size_t N, cl_mem iMax, size_t offiMax, const cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); diff --git a/IPL/include/opencv/opencv2/core/opencl/runtime/autogenerated/opencl_clamdfft.hpp b/IPL/include/opencv/opencv2/core/opencl/runtime/autogenerated/opencl_clamdfft.hpp new file mode 100644 index 0000000..1457d7e --- /dev/null +++ b/IPL/include/opencv/opencv2/core/opencl/runtime/autogenerated/opencl_clamdfft.hpp @@ -0,0 +1,142 @@ +// +// AUTOGENERATED, DO NOT EDIT +// +#ifndef OPENCV_CORE_OCL_RUNTIME_CLAMDFFT_HPP +#error "Invalid usage" +#endif + +// generated by parser_clamdfft.py +#define clAmdFftBakePlan clAmdFftBakePlan_ +#define clAmdFftCopyPlan clAmdFftCopyPlan_ +#define clAmdFftCreateDefaultPlan clAmdFftCreateDefaultPlan_ +#define clAmdFftDestroyPlan clAmdFftDestroyPlan_ +#define clAmdFftEnqueueTransform clAmdFftEnqueueTransform_ +#define clAmdFftGetLayout clAmdFftGetLayout_ +#define clAmdFftGetPlanBatchSize clAmdFftGetPlanBatchSize_ +#define clAmdFftGetPlanContext clAmdFftGetPlanContext_ +#define clAmdFftGetPlanDim clAmdFftGetPlanDim_ +#define clAmdFftGetPlanDistance clAmdFftGetPlanDistance_ +#define clAmdFftGetPlanInStride clAmdFftGetPlanInStride_ +#define clAmdFftGetPlanLength clAmdFftGetPlanLength_ +#define clAmdFftGetPlanOutStride clAmdFftGetPlanOutStride_ +#define clAmdFftGetPlanPrecision clAmdFftGetPlanPrecision_ +#define clAmdFftGetPlanScale clAmdFftGetPlanScale_ +#define clAmdFftGetPlanTransposeResult clAmdFftGetPlanTransposeResult_ +#define clAmdFftGetResultLocation clAmdFftGetResultLocation_ +#define clAmdFftGetTmpBufSize clAmdFftGetTmpBufSize_ +#define clAmdFftGetVersion clAmdFftGetVersion_ +#define clAmdFftSetLayout clAmdFftSetLayout_ +#define clAmdFftSetPlanBatchSize clAmdFftSetPlanBatchSize_ +#define clAmdFftSetPlanDim clAmdFftSetPlanDim_ +#define clAmdFftSetPlanDistance clAmdFftSetPlanDistance_ +#define clAmdFftSetPlanInStride clAmdFftSetPlanInStride_ +#define clAmdFftSetPlanLength clAmdFftSetPlanLength_ +#define clAmdFftSetPlanOutStride clAmdFftSetPlanOutStride_ +#define clAmdFftSetPlanPrecision clAmdFftSetPlanPrecision_ +#define clAmdFftSetPlanScale clAmdFftSetPlanScale_ +#define clAmdFftSetPlanTransposeResult clAmdFftSetPlanTransposeResult_ +#define clAmdFftSetResultLocation clAmdFftSetResultLocation_ +#define clAmdFftSetup clAmdFftSetup_ +#define clAmdFftTeardown clAmdFftTeardown_ + +#include + +// generated by parser_clamdfft.py +#undef clAmdFftBakePlan +#define clAmdFftBakePlan clAmdFftBakePlan_pfn +#undef clAmdFftCopyPlan +//#define clAmdFftCopyPlan clAmdFftCopyPlan_pfn +#undef clAmdFftCreateDefaultPlan +#define clAmdFftCreateDefaultPlan clAmdFftCreateDefaultPlan_pfn +#undef clAmdFftDestroyPlan +#define clAmdFftDestroyPlan clAmdFftDestroyPlan_pfn +#undef clAmdFftEnqueueTransform +#define clAmdFftEnqueueTransform clAmdFftEnqueueTransform_pfn +#undef clAmdFftGetLayout +//#define clAmdFftGetLayout clAmdFftGetLayout_pfn +#undef clAmdFftGetPlanBatchSize +//#define clAmdFftGetPlanBatchSize clAmdFftGetPlanBatchSize_pfn +#undef clAmdFftGetPlanContext +//#define clAmdFftGetPlanContext clAmdFftGetPlanContext_pfn +#undef clAmdFftGetPlanDim +//#define clAmdFftGetPlanDim clAmdFftGetPlanDim_pfn +#undef clAmdFftGetPlanDistance +//#define clAmdFftGetPlanDistance clAmdFftGetPlanDistance_pfn +#undef clAmdFftGetPlanInStride +//#define clAmdFftGetPlanInStride clAmdFftGetPlanInStride_pfn +#undef clAmdFftGetPlanLength +//#define clAmdFftGetPlanLength clAmdFftGetPlanLength_pfn +#undef clAmdFftGetPlanOutStride +//#define clAmdFftGetPlanOutStride clAmdFftGetPlanOutStride_pfn +#undef clAmdFftGetPlanPrecision +//#define clAmdFftGetPlanPrecision clAmdFftGetPlanPrecision_pfn +#undef clAmdFftGetPlanScale +//#define clAmdFftGetPlanScale clAmdFftGetPlanScale_pfn +#undef clAmdFftGetPlanTransposeResult +//#define clAmdFftGetPlanTransposeResult clAmdFftGetPlanTransposeResult_pfn +#undef clAmdFftGetResultLocation +//#define clAmdFftGetResultLocation clAmdFftGetResultLocation_pfn +#undef clAmdFftGetTmpBufSize +#define clAmdFftGetTmpBufSize clAmdFftGetTmpBufSize_pfn +#undef clAmdFftGetVersion +#define clAmdFftGetVersion clAmdFftGetVersion_pfn +#undef clAmdFftSetLayout +#define clAmdFftSetLayout clAmdFftSetLayout_pfn +#undef clAmdFftSetPlanBatchSize +#define clAmdFftSetPlanBatchSize clAmdFftSetPlanBatchSize_pfn +#undef clAmdFftSetPlanDim +//#define clAmdFftSetPlanDim clAmdFftSetPlanDim_pfn +#undef clAmdFftSetPlanDistance +#define clAmdFftSetPlanDistance clAmdFftSetPlanDistance_pfn +#undef clAmdFftSetPlanInStride +#define clAmdFftSetPlanInStride clAmdFftSetPlanInStride_pfn +#undef clAmdFftSetPlanLength +//#define clAmdFftSetPlanLength clAmdFftSetPlanLength_pfn +#undef clAmdFftSetPlanOutStride +#define clAmdFftSetPlanOutStride clAmdFftSetPlanOutStride_pfn +#undef clAmdFftSetPlanPrecision +#define clAmdFftSetPlanPrecision clAmdFftSetPlanPrecision_pfn +#undef clAmdFftSetPlanScale +#define clAmdFftSetPlanScale clAmdFftSetPlanScale_pfn +#undef clAmdFftSetPlanTransposeResult +//#define clAmdFftSetPlanTransposeResult clAmdFftSetPlanTransposeResult_pfn +#undef clAmdFftSetResultLocation +#define clAmdFftSetResultLocation clAmdFftSetResultLocation_pfn +#undef clAmdFftSetup +#define clAmdFftSetup clAmdFftSetup_pfn +#undef clAmdFftTeardown +#define clAmdFftTeardown clAmdFftTeardown_pfn + +// generated by parser_clamdfft.py +extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftBakePlan)(clAmdFftPlanHandle plHandle, cl_uint numQueues, cl_command_queue* commQueueFFT, void (CL_CALLBACK* pfn_notify) (clAmdFftPlanHandle plHandle, void* user_data), void* user_data); +//extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftCopyPlan)(clAmdFftPlanHandle* out_plHandle, cl_context new_context, clAmdFftPlanHandle in_plHandle); +extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftCreateDefaultPlan)(clAmdFftPlanHandle* plHandle, cl_context context, const clAmdFftDim dim, const size_t* clLengths); +extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftDestroyPlan)(clAmdFftPlanHandle* plHandle); +extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftEnqueueTransform)(clAmdFftPlanHandle plHandle, clAmdFftDirection dir, cl_uint numQueuesAndEvents, cl_command_queue* commQueues, cl_uint numWaitEvents, const cl_event* waitEvents, cl_event* outEvents, cl_mem* inputBuffers, cl_mem* outputBuffers, cl_mem tmpBuffer); +//extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftGetLayout)(const clAmdFftPlanHandle plHandle, clAmdFftLayout* iLayout, clAmdFftLayout* oLayout); +//extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftGetPlanBatchSize)(const clAmdFftPlanHandle plHandle, size_t* batchSize); +//extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftGetPlanContext)(const clAmdFftPlanHandle plHandle, cl_context* context); +//extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftGetPlanDim)(const clAmdFftPlanHandle plHandle, clAmdFftDim* dim, cl_uint* size); +//extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftGetPlanDistance)(const clAmdFftPlanHandle plHandle, size_t* iDist, size_t* oDist); +//extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftGetPlanInStride)(const clAmdFftPlanHandle plHandle, const clAmdFftDim dim, size_t* clStrides); +//extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftGetPlanLength)(const clAmdFftPlanHandle plHandle, const clAmdFftDim dim, size_t* clLengths); +//extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftGetPlanOutStride)(const clAmdFftPlanHandle plHandle, const clAmdFftDim dim, size_t* clStrides); +//extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftGetPlanPrecision)(const clAmdFftPlanHandle plHandle, clAmdFftPrecision* precision); +//extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftGetPlanScale)(const clAmdFftPlanHandle plHandle, clAmdFftDirection dir, cl_float* scale); +//extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftGetPlanTransposeResult)(const clAmdFftPlanHandle plHandle, clAmdFftResultTransposed* transposed); +//extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftGetResultLocation)(const clAmdFftPlanHandle plHandle, clAmdFftResultLocation* placeness); +extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftGetTmpBufSize)(const clAmdFftPlanHandle plHandle, size_t* buffersize); +extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftGetVersion)(cl_uint* major, cl_uint* minor, cl_uint* patch); +extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftSetLayout)(clAmdFftPlanHandle plHandle, clAmdFftLayout iLayout, clAmdFftLayout oLayout); +extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftSetPlanBatchSize)(clAmdFftPlanHandle plHandle, size_t batchSize); +//extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftSetPlanDim)(clAmdFftPlanHandle plHandle, const clAmdFftDim dim); +extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftSetPlanDistance)(clAmdFftPlanHandle plHandle, size_t iDist, size_t oDist); +extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftSetPlanInStride)(clAmdFftPlanHandle plHandle, const clAmdFftDim dim, size_t* clStrides); +//extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftSetPlanLength)(clAmdFftPlanHandle plHandle, const clAmdFftDim dim, const size_t* clLengths); +extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftSetPlanOutStride)(clAmdFftPlanHandle plHandle, const clAmdFftDim dim, size_t* clStrides); +extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftSetPlanPrecision)(clAmdFftPlanHandle plHandle, clAmdFftPrecision precision); +extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftSetPlanScale)(clAmdFftPlanHandle plHandle, clAmdFftDirection dir, cl_float scale); +//extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftSetPlanTransposeResult)(clAmdFftPlanHandle plHandle, clAmdFftResultTransposed transposed); +extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftSetResultLocation)(clAmdFftPlanHandle plHandle, clAmdFftResultLocation placeness); +extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftSetup)(const clAmdFftSetupData* setupData); +extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftTeardown)(); diff --git a/IPL/include/opencv/opencv2/core/opencl/runtime/autogenerated/opencl_core.hpp b/IPL/include/opencv/opencv2/core/opencl/runtime/autogenerated/opencl_core.hpp new file mode 100644 index 0000000..28618a1 --- /dev/null +++ b/IPL/include/opencv/opencv2/core/opencl/runtime/autogenerated/opencl_core.hpp @@ -0,0 +1,371 @@ +// +// AUTOGENERATED, DO NOT EDIT +// +#ifndef OPENCV_CORE_OCL_RUNTIME_OPENCL_CORE_HPP +#error "Invalid usage" +#endif + +// generated by parser_cl.py +#define clBuildProgram clBuildProgram_ +#define clCompileProgram clCompileProgram_ +#define clCreateBuffer clCreateBuffer_ +#define clCreateCommandQueue clCreateCommandQueue_ +#define clCreateContext clCreateContext_ +#define clCreateContextFromType clCreateContextFromType_ +#define clCreateImage clCreateImage_ +#define clCreateImage2D clCreateImage2D_ +#define clCreateImage3D clCreateImage3D_ +#define clCreateKernel clCreateKernel_ +#define clCreateKernelsInProgram clCreateKernelsInProgram_ +#define clCreateProgramWithBinary clCreateProgramWithBinary_ +#define clCreateProgramWithBuiltInKernels clCreateProgramWithBuiltInKernels_ +#define clCreateProgramWithSource clCreateProgramWithSource_ +#define clCreateSampler clCreateSampler_ +#define clCreateSubBuffer clCreateSubBuffer_ +#define clCreateSubDevices clCreateSubDevices_ +#define clCreateUserEvent clCreateUserEvent_ +#define clEnqueueBarrier clEnqueueBarrier_ +#define clEnqueueBarrierWithWaitList clEnqueueBarrierWithWaitList_ +#define clEnqueueCopyBuffer clEnqueueCopyBuffer_ +#define clEnqueueCopyBufferRect clEnqueueCopyBufferRect_ +#define clEnqueueCopyBufferToImage clEnqueueCopyBufferToImage_ +#define clEnqueueCopyImage clEnqueueCopyImage_ +#define clEnqueueCopyImageToBuffer clEnqueueCopyImageToBuffer_ +#define clEnqueueFillBuffer clEnqueueFillBuffer_ +#define clEnqueueFillImage clEnqueueFillImage_ +#define clEnqueueMapBuffer clEnqueueMapBuffer_ +#define clEnqueueMapImage clEnqueueMapImage_ +#define clEnqueueMarker clEnqueueMarker_ +#define clEnqueueMarkerWithWaitList clEnqueueMarkerWithWaitList_ +#define clEnqueueMigrateMemObjects clEnqueueMigrateMemObjects_ +#define clEnqueueNDRangeKernel clEnqueueNDRangeKernel_ +#define clEnqueueNativeKernel clEnqueueNativeKernel_ +#define clEnqueueReadBuffer clEnqueueReadBuffer_ +#define clEnqueueReadBufferRect clEnqueueReadBufferRect_ +#define clEnqueueReadImage clEnqueueReadImage_ +#define clEnqueueTask clEnqueueTask_ +#define clEnqueueUnmapMemObject clEnqueueUnmapMemObject_ +#define clEnqueueWaitForEvents clEnqueueWaitForEvents_ +#define clEnqueueWriteBuffer clEnqueueWriteBuffer_ +#define clEnqueueWriteBufferRect clEnqueueWriteBufferRect_ +#define clEnqueueWriteImage clEnqueueWriteImage_ +#define clFinish clFinish_ +#define clFlush clFlush_ +#define clGetCommandQueueInfo clGetCommandQueueInfo_ +#define clGetContextInfo clGetContextInfo_ +#define clGetDeviceIDs clGetDeviceIDs_ +#define clGetDeviceInfo clGetDeviceInfo_ +#define clGetEventInfo clGetEventInfo_ +#define clGetEventProfilingInfo clGetEventProfilingInfo_ +#define clGetExtensionFunctionAddress clGetExtensionFunctionAddress_ +#define clGetExtensionFunctionAddressForPlatform clGetExtensionFunctionAddressForPlatform_ +#define clGetImageInfo clGetImageInfo_ +#define clGetKernelArgInfo clGetKernelArgInfo_ +#define clGetKernelInfo clGetKernelInfo_ +#define clGetKernelWorkGroupInfo clGetKernelWorkGroupInfo_ +#define clGetMemObjectInfo clGetMemObjectInfo_ +#define clGetPlatformIDs clGetPlatformIDs_ +#define clGetPlatformInfo clGetPlatformInfo_ +#define clGetProgramBuildInfo clGetProgramBuildInfo_ +#define clGetProgramInfo clGetProgramInfo_ +#define clGetSamplerInfo clGetSamplerInfo_ +#define clGetSupportedImageFormats clGetSupportedImageFormats_ +#define clLinkProgram clLinkProgram_ +#define clReleaseCommandQueue clReleaseCommandQueue_ +#define clReleaseContext clReleaseContext_ +#define clReleaseDevice clReleaseDevice_ +#define clReleaseEvent clReleaseEvent_ +#define clReleaseKernel clReleaseKernel_ +#define clReleaseMemObject clReleaseMemObject_ +#define clReleaseProgram clReleaseProgram_ +#define clReleaseSampler clReleaseSampler_ +#define clRetainCommandQueue clRetainCommandQueue_ +#define clRetainContext clRetainContext_ +#define clRetainDevice clRetainDevice_ +#define clRetainEvent clRetainEvent_ +#define clRetainKernel clRetainKernel_ +#define clRetainMemObject clRetainMemObject_ +#define clRetainProgram clRetainProgram_ +#define clRetainSampler clRetainSampler_ +#define clSetEventCallback clSetEventCallback_ +#define clSetKernelArg clSetKernelArg_ +#define clSetMemObjectDestructorCallback clSetMemObjectDestructorCallback_ +#define clSetUserEventStatus clSetUserEventStatus_ +#define clUnloadCompiler clUnloadCompiler_ +#define clUnloadPlatformCompiler clUnloadPlatformCompiler_ +#define clWaitForEvents clWaitForEvents_ + +#if defined __APPLE__ +#define CL_SILENCE_DEPRECATION +#include +#else +#include +#endif + +// generated by parser_cl.py +#undef clBuildProgram +#define clBuildProgram clBuildProgram_pfn +#undef clCompileProgram +#define clCompileProgram clCompileProgram_pfn +#undef clCreateBuffer +#define clCreateBuffer clCreateBuffer_pfn +#undef clCreateCommandQueue +#define clCreateCommandQueue clCreateCommandQueue_pfn +#undef clCreateContext +#define clCreateContext clCreateContext_pfn +#undef clCreateContextFromType +#define clCreateContextFromType clCreateContextFromType_pfn +#undef clCreateImage +#define clCreateImage clCreateImage_pfn +#undef clCreateImage2D +#define clCreateImage2D clCreateImage2D_pfn +#undef clCreateImage3D +#define clCreateImage3D clCreateImage3D_pfn +#undef clCreateKernel +#define clCreateKernel clCreateKernel_pfn +#undef clCreateKernelsInProgram +#define clCreateKernelsInProgram clCreateKernelsInProgram_pfn +#undef clCreateProgramWithBinary +#define clCreateProgramWithBinary clCreateProgramWithBinary_pfn +#undef clCreateProgramWithBuiltInKernels +#define clCreateProgramWithBuiltInKernels clCreateProgramWithBuiltInKernels_pfn +#undef clCreateProgramWithSource +#define clCreateProgramWithSource clCreateProgramWithSource_pfn +#undef clCreateSampler +#define clCreateSampler clCreateSampler_pfn +#undef clCreateSubBuffer +#define clCreateSubBuffer clCreateSubBuffer_pfn +#undef clCreateSubDevices +#define clCreateSubDevices clCreateSubDevices_pfn +#undef clCreateUserEvent +#define clCreateUserEvent clCreateUserEvent_pfn +#undef clEnqueueBarrier +#define clEnqueueBarrier clEnqueueBarrier_pfn +#undef clEnqueueBarrierWithWaitList +#define clEnqueueBarrierWithWaitList clEnqueueBarrierWithWaitList_pfn +#undef clEnqueueCopyBuffer +#define clEnqueueCopyBuffer clEnqueueCopyBuffer_pfn +#undef clEnqueueCopyBufferRect +#define clEnqueueCopyBufferRect clEnqueueCopyBufferRect_pfn +#undef clEnqueueCopyBufferToImage +#define clEnqueueCopyBufferToImage clEnqueueCopyBufferToImage_pfn +#undef clEnqueueCopyImage +#define clEnqueueCopyImage clEnqueueCopyImage_pfn +#undef clEnqueueCopyImageToBuffer +#define clEnqueueCopyImageToBuffer clEnqueueCopyImageToBuffer_pfn +#undef clEnqueueFillBuffer +#define clEnqueueFillBuffer clEnqueueFillBuffer_pfn +#undef clEnqueueFillImage +#define clEnqueueFillImage clEnqueueFillImage_pfn +#undef clEnqueueMapBuffer +#define clEnqueueMapBuffer clEnqueueMapBuffer_pfn +#undef clEnqueueMapImage +#define clEnqueueMapImage clEnqueueMapImage_pfn +#undef clEnqueueMarker +#define clEnqueueMarker clEnqueueMarker_pfn +#undef clEnqueueMarkerWithWaitList +#define clEnqueueMarkerWithWaitList clEnqueueMarkerWithWaitList_pfn +#undef clEnqueueMigrateMemObjects +#define clEnqueueMigrateMemObjects clEnqueueMigrateMemObjects_pfn +#undef clEnqueueNDRangeKernel +#define clEnqueueNDRangeKernel clEnqueueNDRangeKernel_pfn +#undef clEnqueueNativeKernel +#define clEnqueueNativeKernel clEnqueueNativeKernel_pfn +#undef clEnqueueReadBuffer +#define clEnqueueReadBuffer clEnqueueReadBuffer_pfn +#undef clEnqueueReadBufferRect +#define clEnqueueReadBufferRect clEnqueueReadBufferRect_pfn +#undef clEnqueueReadImage +#define clEnqueueReadImage clEnqueueReadImage_pfn +#undef clEnqueueTask +#define clEnqueueTask clEnqueueTask_pfn +#undef clEnqueueUnmapMemObject +#define clEnqueueUnmapMemObject clEnqueueUnmapMemObject_pfn +#undef clEnqueueWaitForEvents +#define clEnqueueWaitForEvents clEnqueueWaitForEvents_pfn +#undef clEnqueueWriteBuffer +#define clEnqueueWriteBuffer clEnqueueWriteBuffer_pfn +#undef clEnqueueWriteBufferRect +#define clEnqueueWriteBufferRect clEnqueueWriteBufferRect_pfn +#undef clEnqueueWriteImage +#define clEnqueueWriteImage clEnqueueWriteImage_pfn +#undef clFinish +#define clFinish clFinish_pfn +#undef clFlush +#define clFlush clFlush_pfn +#undef clGetCommandQueueInfo +#define clGetCommandQueueInfo clGetCommandQueueInfo_pfn +#undef clGetContextInfo +#define clGetContextInfo clGetContextInfo_pfn +#undef clGetDeviceIDs +#define clGetDeviceIDs clGetDeviceIDs_pfn +#undef clGetDeviceInfo +#define clGetDeviceInfo clGetDeviceInfo_pfn +#undef clGetEventInfo +#define clGetEventInfo clGetEventInfo_pfn +#undef clGetEventProfilingInfo +#define clGetEventProfilingInfo clGetEventProfilingInfo_pfn +#undef clGetExtensionFunctionAddress +#define clGetExtensionFunctionAddress clGetExtensionFunctionAddress_pfn +#undef clGetExtensionFunctionAddressForPlatform +#define clGetExtensionFunctionAddressForPlatform clGetExtensionFunctionAddressForPlatform_pfn +#undef clGetImageInfo +#define clGetImageInfo clGetImageInfo_pfn +#undef clGetKernelArgInfo +#define clGetKernelArgInfo clGetKernelArgInfo_pfn +#undef clGetKernelInfo +#define clGetKernelInfo clGetKernelInfo_pfn +#undef clGetKernelWorkGroupInfo +#define clGetKernelWorkGroupInfo clGetKernelWorkGroupInfo_pfn +#undef clGetMemObjectInfo +#define clGetMemObjectInfo clGetMemObjectInfo_pfn +#undef clGetPlatformIDs +#define clGetPlatformIDs clGetPlatformIDs_pfn +#undef clGetPlatformInfo +#define clGetPlatformInfo clGetPlatformInfo_pfn +#undef clGetProgramBuildInfo +#define clGetProgramBuildInfo clGetProgramBuildInfo_pfn +#undef clGetProgramInfo +#define clGetProgramInfo clGetProgramInfo_pfn +#undef clGetSamplerInfo +#define clGetSamplerInfo clGetSamplerInfo_pfn +#undef clGetSupportedImageFormats +#define clGetSupportedImageFormats clGetSupportedImageFormats_pfn +#undef clLinkProgram +#define clLinkProgram clLinkProgram_pfn +#undef clReleaseCommandQueue +#define clReleaseCommandQueue clReleaseCommandQueue_pfn +#undef clReleaseContext +#define clReleaseContext clReleaseContext_pfn +#undef clReleaseDevice +#define clReleaseDevice clReleaseDevice_pfn +#undef clReleaseEvent +#define clReleaseEvent clReleaseEvent_pfn +#undef clReleaseKernel +#define clReleaseKernel clReleaseKernel_pfn +#undef clReleaseMemObject +#define clReleaseMemObject clReleaseMemObject_pfn +#undef clReleaseProgram +#define clReleaseProgram clReleaseProgram_pfn +#undef clReleaseSampler +#define clReleaseSampler clReleaseSampler_pfn +#undef clRetainCommandQueue +#define clRetainCommandQueue clRetainCommandQueue_pfn +#undef clRetainContext +#define clRetainContext clRetainContext_pfn +#undef clRetainDevice +#define clRetainDevice clRetainDevice_pfn +#undef clRetainEvent +#define clRetainEvent clRetainEvent_pfn +#undef clRetainKernel +#define clRetainKernel clRetainKernel_pfn +#undef clRetainMemObject +#define clRetainMemObject clRetainMemObject_pfn +#undef clRetainProgram +#define clRetainProgram clRetainProgram_pfn +#undef clRetainSampler +#define clRetainSampler clRetainSampler_pfn +#undef clSetEventCallback +#define clSetEventCallback clSetEventCallback_pfn +#undef clSetKernelArg +#define clSetKernelArg clSetKernelArg_pfn +#undef clSetMemObjectDestructorCallback +#define clSetMemObjectDestructorCallback clSetMemObjectDestructorCallback_pfn +#undef clSetUserEventStatus +#define clSetUserEventStatus clSetUserEventStatus_pfn +#undef clUnloadCompiler +#define clUnloadCompiler clUnloadCompiler_pfn +#undef clUnloadPlatformCompiler +#define clUnloadPlatformCompiler clUnloadPlatformCompiler_pfn +#undef clWaitForEvents +#define clWaitForEvents clWaitForEvents_pfn + +// generated by parser_cl.py +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clBuildProgram)(cl_program, cl_uint, const cl_device_id*, const char*, void (CL_CALLBACK*) (cl_program, void*), void*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clCompileProgram)(cl_program, cl_uint, const cl_device_id*, const char*, cl_uint, const cl_program*, const char**, void (CL_CALLBACK*) (cl_program, void*), void*); +extern CL_RUNTIME_EXPORT cl_mem (CL_API_CALL*clCreateBuffer)(cl_context, cl_mem_flags, size_t, void*, cl_int*); +extern CL_RUNTIME_EXPORT cl_command_queue (CL_API_CALL*clCreateCommandQueue)(cl_context, cl_device_id, cl_command_queue_properties, cl_int*); +extern CL_RUNTIME_EXPORT cl_context (CL_API_CALL*clCreateContext)(const cl_context_properties*, cl_uint, const cl_device_id*, void (CL_CALLBACK*) (const char*, const void*, size_t, void*), void*, cl_int*); +extern CL_RUNTIME_EXPORT cl_context (CL_API_CALL*clCreateContextFromType)(const cl_context_properties*, cl_device_type, void (CL_CALLBACK*) (const char*, const void*, size_t, void*), void*, cl_int*); +extern CL_RUNTIME_EXPORT cl_mem (CL_API_CALL*clCreateImage)(cl_context, cl_mem_flags, const cl_image_format*, const cl_image_desc*, void*, cl_int*); +extern CL_RUNTIME_EXPORT cl_mem (CL_API_CALL*clCreateImage2D)(cl_context, cl_mem_flags, const cl_image_format*, size_t, size_t, size_t, void*, cl_int*); +extern CL_RUNTIME_EXPORT cl_mem (CL_API_CALL*clCreateImage3D)(cl_context, cl_mem_flags, const cl_image_format*, size_t, size_t, size_t, size_t, size_t, void*, cl_int*); +extern CL_RUNTIME_EXPORT cl_kernel (CL_API_CALL*clCreateKernel)(cl_program, const char*, cl_int*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clCreateKernelsInProgram)(cl_program, cl_uint, cl_kernel*, cl_uint*); +extern CL_RUNTIME_EXPORT cl_program (CL_API_CALL*clCreateProgramWithBinary)(cl_context, cl_uint, const cl_device_id*, const size_t*, const unsigned char**, cl_int*, cl_int*); +extern CL_RUNTIME_EXPORT cl_program (CL_API_CALL*clCreateProgramWithBuiltInKernels)(cl_context, cl_uint, const cl_device_id*, const char*, cl_int*); +extern CL_RUNTIME_EXPORT cl_program (CL_API_CALL*clCreateProgramWithSource)(cl_context, cl_uint, const char**, const size_t*, cl_int*); +extern CL_RUNTIME_EXPORT cl_sampler (CL_API_CALL*clCreateSampler)(cl_context, cl_bool, cl_addressing_mode, cl_filter_mode, cl_int*); +extern CL_RUNTIME_EXPORT cl_mem (CL_API_CALL*clCreateSubBuffer)(cl_mem, cl_mem_flags, cl_buffer_create_type, const void*, cl_int*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clCreateSubDevices)(cl_device_id, const cl_device_partition_property*, cl_uint, cl_device_id*, cl_uint*); +extern CL_RUNTIME_EXPORT cl_event (CL_API_CALL*clCreateUserEvent)(cl_context, cl_int*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueBarrier)(cl_command_queue); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueBarrierWithWaitList)(cl_command_queue, cl_uint, const cl_event*, cl_event*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueCopyBuffer)(cl_command_queue, cl_mem, cl_mem, size_t, size_t, size_t, cl_uint, const cl_event*, cl_event*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueCopyBufferRect)(cl_command_queue, cl_mem, cl_mem, const size_t*, const size_t*, const size_t*, size_t, size_t, size_t, size_t, cl_uint, const cl_event*, cl_event*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueCopyBufferToImage)(cl_command_queue, cl_mem, cl_mem, size_t, const size_t*, const size_t*, cl_uint, const cl_event*, cl_event*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueCopyImage)(cl_command_queue, cl_mem, cl_mem, const size_t*, const size_t*, const size_t*, cl_uint, const cl_event*, cl_event*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueCopyImageToBuffer)(cl_command_queue, cl_mem, cl_mem, const size_t*, const size_t*, size_t, cl_uint, const cl_event*, cl_event*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueFillBuffer)(cl_command_queue, cl_mem, const void*, size_t, size_t, size_t, cl_uint, const cl_event*, cl_event*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueFillImage)(cl_command_queue, cl_mem, const void*, const size_t*, const size_t*, cl_uint, const cl_event*, cl_event*); +extern CL_RUNTIME_EXPORT void* (CL_API_CALL*clEnqueueMapBuffer)(cl_command_queue, cl_mem, cl_bool, cl_map_flags, size_t, size_t, cl_uint, const cl_event*, cl_event*, cl_int*); +extern CL_RUNTIME_EXPORT void* (CL_API_CALL*clEnqueueMapImage)(cl_command_queue, cl_mem, cl_bool, cl_map_flags, const size_t*, const size_t*, size_t*, size_t*, cl_uint, const cl_event*, cl_event*, cl_int*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueMarker)(cl_command_queue, cl_event*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueMarkerWithWaitList)(cl_command_queue, cl_uint, const cl_event*, cl_event*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueMigrateMemObjects)(cl_command_queue, cl_uint, const cl_mem*, cl_mem_migration_flags, cl_uint, const cl_event*, cl_event*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueNDRangeKernel)(cl_command_queue, cl_kernel, cl_uint, const size_t*, const size_t*, const size_t*, cl_uint, const cl_event*, cl_event*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueNativeKernel)(cl_command_queue, void (CL_CALLBACK*) (void*), void*, size_t, cl_uint, const cl_mem*, const void**, cl_uint, const cl_event*, cl_event*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueReadBuffer)(cl_command_queue, cl_mem, cl_bool, size_t, size_t, void*, cl_uint, const cl_event*, cl_event*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueReadBufferRect)(cl_command_queue, cl_mem, cl_bool, const size_t*, const size_t*, const size_t*, size_t, size_t, size_t, size_t, void*, cl_uint, const cl_event*, cl_event*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueReadImage)(cl_command_queue, cl_mem, cl_bool, const size_t*, const size_t*, size_t, size_t, void*, cl_uint, const cl_event*, cl_event*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueTask)(cl_command_queue, cl_kernel, cl_uint, const cl_event*, cl_event*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueUnmapMemObject)(cl_command_queue, cl_mem, void*, cl_uint, const cl_event*, cl_event*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueWaitForEvents)(cl_command_queue, cl_uint, const cl_event*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueWriteBuffer)(cl_command_queue, cl_mem, cl_bool, size_t, size_t, const void*, cl_uint, const cl_event*, cl_event*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueWriteBufferRect)(cl_command_queue, cl_mem, cl_bool, const size_t*, const size_t*, const size_t*, size_t, size_t, size_t, size_t, const void*, cl_uint, const cl_event*, cl_event*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueWriteImage)(cl_command_queue, cl_mem, cl_bool, const size_t*, const size_t*, size_t, size_t, const void*, cl_uint, const cl_event*, cl_event*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clFinish)(cl_command_queue); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clFlush)(cl_command_queue); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clGetCommandQueueInfo)(cl_command_queue, cl_command_queue_info, size_t, void*, size_t*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clGetContextInfo)(cl_context, cl_context_info, size_t, void*, size_t*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clGetDeviceIDs)(cl_platform_id, cl_device_type, cl_uint, cl_device_id*, cl_uint*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clGetDeviceInfo)(cl_device_id, cl_device_info, size_t, void*, size_t*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clGetEventInfo)(cl_event, cl_event_info, size_t, void*, size_t*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clGetEventProfilingInfo)(cl_event, cl_profiling_info, size_t, void*, size_t*); +extern CL_RUNTIME_EXPORT void* (CL_API_CALL*clGetExtensionFunctionAddress)(const char*); +extern CL_RUNTIME_EXPORT void* (CL_API_CALL*clGetExtensionFunctionAddressForPlatform)(cl_platform_id, const char*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clGetImageInfo)(cl_mem, cl_image_info, size_t, void*, size_t*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clGetKernelArgInfo)(cl_kernel, cl_uint, cl_kernel_arg_info, size_t, void*, size_t*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clGetKernelInfo)(cl_kernel, cl_kernel_info, size_t, void*, size_t*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clGetKernelWorkGroupInfo)(cl_kernel, cl_device_id, cl_kernel_work_group_info, size_t, void*, size_t*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clGetMemObjectInfo)(cl_mem, cl_mem_info, size_t, void*, size_t*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clGetPlatformIDs)(cl_uint, cl_platform_id*, cl_uint*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clGetPlatformInfo)(cl_platform_id, cl_platform_info, size_t, void*, size_t*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clGetProgramBuildInfo)(cl_program, cl_device_id, cl_program_build_info, size_t, void*, size_t*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clGetProgramInfo)(cl_program, cl_program_info, size_t, void*, size_t*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clGetSamplerInfo)(cl_sampler, cl_sampler_info, size_t, void*, size_t*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clGetSupportedImageFormats)(cl_context, cl_mem_flags, cl_mem_object_type, cl_uint, cl_image_format*, cl_uint*); +extern CL_RUNTIME_EXPORT cl_program (CL_API_CALL*clLinkProgram)(cl_context, cl_uint, const cl_device_id*, const char*, cl_uint, const cl_program*, void (CL_CALLBACK*) (cl_program, void*), void*, cl_int*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clReleaseCommandQueue)(cl_command_queue); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clReleaseContext)(cl_context); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clReleaseDevice)(cl_device_id); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clReleaseEvent)(cl_event); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clReleaseKernel)(cl_kernel); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clReleaseMemObject)(cl_mem); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clReleaseProgram)(cl_program); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clReleaseSampler)(cl_sampler); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clRetainCommandQueue)(cl_command_queue); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clRetainContext)(cl_context); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clRetainDevice)(cl_device_id); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clRetainEvent)(cl_event); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clRetainKernel)(cl_kernel); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clRetainMemObject)(cl_mem); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clRetainProgram)(cl_program); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clRetainSampler)(cl_sampler); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clSetEventCallback)(cl_event, cl_int, void (CL_CALLBACK*) (cl_event, cl_int, void*), void*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clSetKernelArg)(cl_kernel, cl_uint, size_t, const void*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clSetMemObjectDestructorCallback)(cl_mem, void (CL_CALLBACK*) (cl_mem, void*), void*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clSetUserEventStatus)(cl_event, cl_int); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clUnloadCompiler)(); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clUnloadPlatformCompiler)(cl_platform_id); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clWaitForEvents)(cl_uint, const cl_event*); diff --git a/IPL/include/opencv/opencv2/core/opencl/runtime/autogenerated/opencl_core_wrappers.hpp b/IPL/include/opencv/opencv2/core/opencl/runtime/autogenerated/opencl_core_wrappers.hpp new file mode 100644 index 0000000..216b22b --- /dev/null +++ b/IPL/include/opencv/opencv2/core/opencl/runtime/autogenerated/opencl_core_wrappers.hpp @@ -0,0 +1,272 @@ +// +// AUTOGENERATED, DO NOT EDIT +// +#ifndef OPENCV_CORE_OCL_RUNTIME_OPENCL_WRAPPERS_HPP +#error "Invalid usage" +#endif + +// generated by parser_cl.py +#undef clBuildProgram +#define clBuildProgram clBuildProgram_fn +inline cl_int clBuildProgram(cl_program p0, cl_uint p1, const cl_device_id* p2, const char* p3, void (CL_CALLBACK*p4) (cl_program, void*), void* p5) { return clBuildProgram_pfn(p0, p1, p2, p3, p4, p5); } +#undef clCompileProgram +#define clCompileProgram clCompileProgram_fn +inline cl_int clCompileProgram(cl_program p0, cl_uint p1, const cl_device_id* p2, const char* p3, cl_uint p4, const cl_program* p5, const char** p6, void (CL_CALLBACK*p7) (cl_program, void*), void* p8) { return clCompileProgram_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8); } +#undef clCreateBuffer +#define clCreateBuffer clCreateBuffer_fn +inline cl_mem clCreateBuffer(cl_context p0, cl_mem_flags p1, size_t p2, void* p3, cl_int* p4) { return clCreateBuffer_pfn(p0, p1, p2, p3, p4); } +#undef clCreateCommandQueue +#define clCreateCommandQueue clCreateCommandQueue_fn +inline cl_command_queue clCreateCommandQueue(cl_context p0, cl_device_id p1, cl_command_queue_properties p2, cl_int* p3) { return clCreateCommandQueue_pfn(p0, p1, p2, p3); } +#undef clCreateContext +#define clCreateContext clCreateContext_fn +inline cl_context clCreateContext(const cl_context_properties* p0, cl_uint p1, const cl_device_id* p2, void (CL_CALLBACK*p3) (const char*, const void*, size_t, void*), void* p4, cl_int* p5) { return clCreateContext_pfn(p0, p1, p2, p3, p4, p5); } +#undef clCreateContextFromType +#define clCreateContextFromType clCreateContextFromType_fn +inline cl_context clCreateContextFromType(const cl_context_properties* p0, cl_device_type p1, void (CL_CALLBACK*p2) (const char*, const void*, size_t, void*), void* p3, cl_int* p4) { return clCreateContextFromType_pfn(p0, p1, p2, p3, p4); } +#undef clCreateImage +#define clCreateImage clCreateImage_fn +inline cl_mem clCreateImage(cl_context p0, cl_mem_flags p1, const cl_image_format* p2, const cl_image_desc* p3, void* p4, cl_int* p5) { return clCreateImage_pfn(p0, p1, p2, p3, p4, p5); } +#undef clCreateImage2D +#define clCreateImage2D clCreateImage2D_fn +inline cl_mem clCreateImage2D(cl_context p0, cl_mem_flags p1, const cl_image_format* p2, size_t p3, size_t p4, size_t p5, void* p6, cl_int* p7) { return clCreateImage2D_pfn(p0, p1, p2, p3, p4, p5, p6, p7); } +#undef clCreateImage3D +#define clCreateImage3D clCreateImage3D_fn +inline cl_mem clCreateImage3D(cl_context p0, cl_mem_flags p1, const cl_image_format* p2, size_t p3, size_t p4, size_t p5, size_t p6, size_t p7, void* p8, cl_int* p9) { return clCreateImage3D_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9); } +#undef clCreateKernel +#define clCreateKernel clCreateKernel_fn +inline cl_kernel clCreateKernel(cl_program p0, const char* p1, cl_int* p2) { return clCreateKernel_pfn(p0, p1, p2); } +#undef clCreateKernelsInProgram +#define clCreateKernelsInProgram clCreateKernelsInProgram_fn +inline cl_int clCreateKernelsInProgram(cl_program p0, cl_uint p1, cl_kernel* p2, cl_uint* p3) { return clCreateKernelsInProgram_pfn(p0, p1, p2, p3); } +#undef clCreateProgramWithBinary +#define clCreateProgramWithBinary clCreateProgramWithBinary_fn +inline cl_program clCreateProgramWithBinary(cl_context p0, cl_uint p1, const cl_device_id* p2, const size_t* p3, const unsigned char** p4, cl_int* p5, cl_int* p6) { return clCreateProgramWithBinary_pfn(p0, p1, p2, p3, p4, p5, p6); } +#undef clCreateProgramWithBuiltInKernels +#define clCreateProgramWithBuiltInKernels clCreateProgramWithBuiltInKernels_fn +inline cl_program clCreateProgramWithBuiltInKernels(cl_context p0, cl_uint p1, const cl_device_id* p2, const char* p3, cl_int* p4) { return clCreateProgramWithBuiltInKernels_pfn(p0, p1, p2, p3, p4); } +#undef clCreateProgramWithSource +#define clCreateProgramWithSource clCreateProgramWithSource_fn +inline cl_program clCreateProgramWithSource(cl_context p0, cl_uint p1, const char** p2, const size_t* p3, cl_int* p4) { return clCreateProgramWithSource_pfn(p0, p1, p2, p3, p4); } +#undef clCreateSampler +#define clCreateSampler clCreateSampler_fn +inline cl_sampler clCreateSampler(cl_context p0, cl_bool p1, cl_addressing_mode p2, cl_filter_mode p3, cl_int* p4) { return clCreateSampler_pfn(p0, p1, p2, p3, p4); } +#undef clCreateSubBuffer +#define clCreateSubBuffer clCreateSubBuffer_fn +inline cl_mem clCreateSubBuffer(cl_mem p0, cl_mem_flags p1, cl_buffer_create_type p2, const void* p3, cl_int* p4) { return clCreateSubBuffer_pfn(p0, p1, p2, p3, p4); } +#undef clCreateSubDevices +#define clCreateSubDevices clCreateSubDevices_fn +inline cl_int clCreateSubDevices(cl_device_id p0, const cl_device_partition_property* p1, cl_uint p2, cl_device_id* p3, cl_uint* p4) { return clCreateSubDevices_pfn(p0, p1, p2, p3, p4); } +#undef clCreateUserEvent +#define clCreateUserEvent clCreateUserEvent_fn +inline cl_event clCreateUserEvent(cl_context p0, cl_int* p1) { return clCreateUserEvent_pfn(p0, p1); } +#undef clEnqueueBarrier +#define clEnqueueBarrier clEnqueueBarrier_fn +inline cl_int clEnqueueBarrier(cl_command_queue p0) { return clEnqueueBarrier_pfn(p0); } +#undef clEnqueueBarrierWithWaitList +#define clEnqueueBarrierWithWaitList clEnqueueBarrierWithWaitList_fn +inline cl_int clEnqueueBarrierWithWaitList(cl_command_queue p0, cl_uint p1, const cl_event* p2, cl_event* p3) { return clEnqueueBarrierWithWaitList_pfn(p0, p1, p2, p3); } +#undef clEnqueueCopyBuffer +#define clEnqueueCopyBuffer clEnqueueCopyBuffer_fn +inline cl_int clEnqueueCopyBuffer(cl_command_queue p0, cl_mem p1, cl_mem p2, size_t p3, size_t p4, size_t p5, cl_uint p6, const cl_event* p7, cl_event* p8) { return clEnqueueCopyBuffer_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8); } +#undef clEnqueueCopyBufferRect +#define clEnqueueCopyBufferRect clEnqueueCopyBufferRect_fn +inline cl_int clEnqueueCopyBufferRect(cl_command_queue p0, cl_mem p1, cl_mem p2, const size_t* p3, const size_t* p4, const size_t* p5, size_t p6, size_t p7, size_t p8, size_t p9, cl_uint p10, const cl_event* p11, cl_event* p12) { return clEnqueueCopyBufferRect_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12); } +#undef clEnqueueCopyBufferToImage +#define clEnqueueCopyBufferToImage clEnqueueCopyBufferToImage_fn +inline cl_int clEnqueueCopyBufferToImage(cl_command_queue p0, cl_mem p1, cl_mem p2, size_t p3, const size_t* p4, const size_t* p5, cl_uint p6, const cl_event* p7, cl_event* p8) { return clEnqueueCopyBufferToImage_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8); } +#undef clEnqueueCopyImage +#define clEnqueueCopyImage clEnqueueCopyImage_fn +inline cl_int clEnqueueCopyImage(cl_command_queue p0, cl_mem p1, cl_mem p2, const size_t* p3, const size_t* p4, const size_t* p5, cl_uint p6, const cl_event* p7, cl_event* p8) { return clEnqueueCopyImage_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8); } +#undef clEnqueueCopyImageToBuffer +#define clEnqueueCopyImageToBuffer clEnqueueCopyImageToBuffer_fn +inline cl_int clEnqueueCopyImageToBuffer(cl_command_queue p0, cl_mem p1, cl_mem p2, const size_t* p3, const size_t* p4, size_t p5, cl_uint p6, const cl_event* p7, cl_event* p8) { return clEnqueueCopyImageToBuffer_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8); } +#undef clEnqueueFillBuffer +#define clEnqueueFillBuffer clEnqueueFillBuffer_fn +inline cl_int clEnqueueFillBuffer(cl_command_queue p0, cl_mem p1, const void* p2, size_t p3, size_t p4, size_t p5, cl_uint p6, const cl_event* p7, cl_event* p8) { return clEnqueueFillBuffer_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8); } +#undef clEnqueueFillImage +#define clEnqueueFillImage clEnqueueFillImage_fn +inline cl_int clEnqueueFillImage(cl_command_queue p0, cl_mem p1, const void* p2, const size_t* p3, const size_t* p4, cl_uint p5, const cl_event* p6, cl_event* p7) { return clEnqueueFillImage_pfn(p0, p1, p2, p3, p4, p5, p6, p7); } +#undef clEnqueueMapBuffer +#define clEnqueueMapBuffer clEnqueueMapBuffer_fn +inline void* clEnqueueMapBuffer(cl_command_queue p0, cl_mem p1, cl_bool p2, cl_map_flags p3, size_t p4, size_t p5, cl_uint p6, const cl_event* p7, cl_event* p8, cl_int* p9) { return clEnqueueMapBuffer_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9); } +#undef clEnqueueMapImage +#define clEnqueueMapImage clEnqueueMapImage_fn +inline void* clEnqueueMapImage(cl_command_queue p0, cl_mem p1, cl_bool p2, cl_map_flags p3, const size_t* p4, const size_t* p5, size_t* p6, size_t* p7, cl_uint p8, const cl_event* p9, cl_event* p10, cl_int* p11) { return clEnqueueMapImage_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11); } +#undef clEnqueueMarker +#define clEnqueueMarker clEnqueueMarker_fn +inline cl_int clEnqueueMarker(cl_command_queue p0, cl_event* p1) { return clEnqueueMarker_pfn(p0, p1); } +#undef clEnqueueMarkerWithWaitList +#define clEnqueueMarkerWithWaitList clEnqueueMarkerWithWaitList_fn +inline cl_int clEnqueueMarkerWithWaitList(cl_command_queue p0, cl_uint p1, const cl_event* p2, cl_event* p3) { return clEnqueueMarkerWithWaitList_pfn(p0, p1, p2, p3); } +#undef clEnqueueMigrateMemObjects +#define clEnqueueMigrateMemObjects clEnqueueMigrateMemObjects_fn +inline cl_int clEnqueueMigrateMemObjects(cl_command_queue p0, cl_uint p1, const cl_mem* p2, cl_mem_migration_flags p3, cl_uint p4, const cl_event* p5, cl_event* p6) { return clEnqueueMigrateMemObjects_pfn(p0, p1, p2, p3, p4, p5, p6); } +#undef clEnqueueNDRangeKernel +#define clEnqueueNDRangeKernel clEnqueueNDRangeKernel_fn +inline cl_int clEnqueueNDRangeKernel(cl_command_queue p0, cl_kernel p1, cl_uint p2, const size_t* p3, const size_t* p4, const size_t* p5, cl_uint p6, const cl_event* p7, cl_event* p8) { return clEnqueueNDRangeKernel_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8); } +#undef clEnqueueNativeKernel +#define clEnqueueNativeKernel clEnqueueNativeKernel_fn +inline cl_int clEnqueueNativeKernel(cl_command_queue p0, void (CL_CALLBACK*p1) (void*), void* p2, size_t p3, cl_uint p4, const cl_mem* p5, const void** p6, cl_uint p7, const cl_event* p8, cl_event* p9) { return clEnqueueNativeKernel_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9); } +#undef clEnqueueReadBuffer +#define clEnqueueReadBuffer clEnqueueReadBuffer_fn +inline cl_int clEnqueueReadBuffer(cl_command_queue p0, cl_mem p1, cl_bool p2, size_t p3, size_t p4, void* p5, cl_uint p6, const cl_event* p7, cl_event* p8) { return clEnqueueReadBuffer_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8); } +#undef clEnqueueReadBufferRect +#define clEnqueueReadBufferRect clEnqueueReadBufferRect_fn +inline cl_int clEnqueueReadBufferRect(cl_command_queue p0, cl_mem p1, cl_bool p2, const size_t* p3, const size_t* p4, const size_t* p5, size_t p6, size_t p7, size_t p8, size_t p9, void* p10, cl_uint p11, const cl_event* p12, cl_event* p13) { return clEnqueueReadBufferRect_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12, p13); } +#undef clEnqueueReadImage +#define clEnqueueReadImage clEnqueueReadImage_fn +inline cl_int clEnqueueReadImage(cl_command_queue p0, cl_mem p1, cl_bool p2, const size_t* p3, const size_t* p4, size_t p5, size_t p6, void* p7, cl_uint p8, const cl_event* p9, cl_event* p10) { return clEnqueueReadImage_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, p10); } +#undef clEnqueueTask +#define clEnqueueTask clEnqueueTask_fn +inline cl_int clEnqueueTask(cl_command_queue p0, cl_kernel p1, cl_uint p2, const cl_event* p3, cl_event* p4) { return clEnqueueTask_pfn(p0, p1, p2, p3, p4); } +#undef clEnqueueUnmapMemObject +#define clEnqueueUnmapMemObject clEnqueueUnmapMemObject_fn +inline cl_int clEnqueueUnmapMemObject(cl_command_queue p0, cl_mem p1, void* p2, cl_uint p3, const cl_event* p4, cl_event* p5) { return clEnqueueUnmapMemObject_pfn(p0, p1, p2, p3, p4, p5); } +#undef clEnqueueWaitForEvents +#define clEnqueueWaitForEvents clEnqueueWaitForEvents_fn +inline cl_int clEnqueueWaitForEvents(cl_command_queue p0, cl_uint p1, const cl_event* p2) { return clEnqueueWaitForEvents_pfn(p0, p1, p2); } +#undef clEnqueueWriteBuffer +#define clEnqueueWriteBuffer clEnqueueWriteBuffer_fn +inline cl_int clEnqueueWriteBuffer(cl_command_queue p0, cl_mem p1, cl_bool p2, size_t p3, size_t p4, const void* p5, cl_uint p6, const cl_event* p7, cl_event* p8) { return clEnqueueWriteBuffer_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8); } +#undef clEnqueueWriteBufferRect +#define clEnqueueWriteBufferRect clEnqueueWriteBufferRect_fn +inline cl_int clEnqueueWriteBufferRect(cl_command_queue p0, cl_mem p1, cl_bool p2, const size_t* p3, const size_t* p4, const size_t* p5, size_t p6, size_t p7, size_t p8, size_t p9, const void* p10, cl_uint p11, const cl_event* p12, cl_event* p13) { return clEnqueueWriteBufferRect_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12, p13); } +#undef clEnqueueWriteImage +#define clEnqueueWriteImage clEnqueueWriteImage_fn +inline cl_int clEnqueueWriteImage(cl_command_queue p0, cl_mem p1, cl_bool p2, const size_t* p3, const size_t* p4, size_t p5, size_t p6, const void* p7, cl_uint p8, const cl_event* p9, cl_event* p10) { return clEnqueueWriteImage_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, p10); } +#undef clFinish +#define clFinish clFinish_fn +inline cl_int clFinish(cl_command_queue p0) { return clFinish_pfn(p0); } +#undef clFlush +#define clFlush clFlush_fn +inline cl_int clFlush(cl_command_queue p0) { return clFlush_pfn(p0); } +#undef clGetCommandQueueInfo +#define clGetCommandQueueInfo clGetCommandQueueInfo_fn +inline cl_int clGetCommandQueueInfo(cl_command_queue p0, cl_command_queue_info p1, size_t p2, void* p3, size_t* p4) { return clGetCommandQueueInfo_pfn(p0, p1, p2, p3, p4); } +#undef clGetContextInfo +#define clGetContextInfo clGetContextInfo_fn +inline cl_int clGetContextInfo(cl_context p0, cl_context_info p1, size_t p2, void* p3, size_t* p4) { return clGetContextInfo_pfn(p0, p1, p2, p3, p4); } +#undef clGetDeviceIDs +#define clGetDeviceIDs clGetDeviceIDs_fn +inline cl_int clGetDeviceIDs(cl_platform_id p0, cl_device_type p1, cl_uint p2, cl_device_id* p3, cl_uint* p4) { return clGetDeviceIDs_pfn(p0, p1, p2, p3, p4); } +#undef clGetDeviceInfo +#define clGetDeviceInfo clGetDeviceInfo_fn +inline cl_int clGetDeviceInfo(cl_device_id p0, cl_device_info p1, size_t p2, void* p3, size_t* p4) { return clGetDeviceInfo_pfn(p0, p1, p2, p3, p4); } +#undef clGetEventInfo +#define clGetEventInfo clGetEventInfo_fn +inline cl_int clGetEventInfo(cl_event p0, cl_event_info p1, size_t p2, void* p3, size_t* p4) { return clGetEventInfo_pfn(p0, p1, p2, p3, p4); } +#undef clGetEventProfilingInfo +#define clGetEventProfilingInfo clGetEventProfilingInfo_fn +inline cl_int clGetEventProfilingInfo(cl_event p0, cl_profiling_info p1, size_t p2, void* p3, size_t* p4) { return clGetEventProfilingInfo_pfn(p0, p1, p2, p3, p4); } +#undef clGetExtensionFunctionAddress +#define clGetExtensionFunctionAddress clGetExtensionFunctionAddress_fn +inline void* clGetExtensionFunctionAddress(const char* p0) { return clGetExtensionFunctionAddress_pfn(p0); } +#undef clGetExtensionFunctionAddressForPlatform +#define clGetExtensionFunctionAddressForPlatform clGetExtensionFunctionAddressForPlatform_fn +inline void* clGetExtensionFunctionAddressForPlatform(cl_platform_id p0, const char* p1) { return clGetExtensionFunctionAddressForPlatform_pfn(p0, p1); } +#undef clGetImageInfo +#define clGetImageInfo clGetImageInfo_fn +inline cl_int clGetImageInfo(cl_mem p0, cl_image_info p1, size_t p2, void* p3, size_t* p4) { return clGetImageInfo_pfn(p0, p1, p2, p3, p4); } +#undef clGetKernelArgInfo +#define clGetKernelArgInfo clGetKernelArgInfo_fn +inline cl_int clGetKernelArgInfo(cl_kernel p0, cl_uint p1, cl_kernel_arg_info p2, size_t p3, void* p4, size_t* p5) { return clGetKernelArgInfo_pfn(p0, p1, p2, p3, p4, p5); } +#undef clGetKernelInfo +#define clGetKernelInfo clGetKernelInfo_fn +inline cl_int clGetKernelInfo(cl_kernel p0, cl_kernel_info p1, size_t p2, void* p3, size_t* p4) { return clGetKernelInfo_pfn(p0, p1, p2, p3, p4); } +#undef clGetKernelWorkGroupInfo +#define clGetKernelWorkGroupInfo clGetKernelWorkGroupInfo_fn +inline cl_int clGetKernelWorkGroupInfo(cl_kernel p0, cl_device_id p1, cl_kernel_work_group_info p2, size_t p3, void* p4, size_t* p5) { return clGetKernelWorkGroupInfo_pfn(p0, p1, p2, p3, p4, p5); } +#undef clGetMemObjectInfo +#define clGetMemObjectInfo clGetMemObjectInfo_fn +inline cl_int clGetMemObjectInfo(cl_mem p0, cl_mem_info p1, size_t p2, void* p3, size_t* p4) { return clGetMemObjectInfo_pfn(p0, p1, p2, p3, p4); } +#undef clGetPlatformIDs +#define clGetPlatformIDs clGetPlatformIDs_fn +inline cl_int clGetPlatformIDs(cl_uint p0, cl_platform_id* p1, cl_uint* p2) { return clGetPlatformIDs_pfn(p0, p1, p2); } +#undef clGetPlatformInfo +#define clGetPlatformInfo clGetPlatformInfo_fn +inline cl_int clGetPlatformInfo(cl_platform_id p0, cl_platform_info p1, size_t p2, void* p3, size_t* p4) { return clGetPlatformInfo_pfn(p0, p1, p2, p3, p4); } +#undef clGetProgramBuildInfo +#define clGetProgramBuildInfo clGetProgramBuildInfo_fn +inline cl_int clGetProgramBuildInfo(cl_program p0, cl_device_id p1, cl_program_build_info p2, size_t p3, void* p4, size_t* p5) { return clGetProgramBuildInfo_pfn(p0, p1, p2, p3, p4, p5); } +#undef clGetProgramInfo +#define clGetProgramInfo clGetProgramInfo_fn +inline cl_int clGetProgramInfo(cl_program p0, cl_program_info p1, size_t p2, void* p3, size_t* p4) { return clGetProgramInfo_pfn(p0, p1, p2, p3, p4); } +#undef clGetSamplerInfo +#define clGetSamplerInfo clGetSamplerInfo_fn +inline cl_int clGetSamplerInfo(cl_sampler p0, cl_sampler_info p1, size_t p2, void* p3, size_t* p4) { return clGetSamplerInfo_pfn(p0, p1, p2, p3, p4); } +#undef clGetSupportedImageFormats +#define clGetSupportedImageFormats clGetSupportedImageFormats_fn +inline cl_int clGetSupportedImageFormats(cl_context p0, cl_mem_flags p1, cl_mem_object_type p2, cl_uint p3, cl_image_format* p4, cl_uint* p5) { return clGetSupportedImageFormats_pfn(p0, p1, p2, p3, p4, p5); } +#undef clLinkProgram +#define clLinkProgram clLinkProgram_fn +inline cl_program clLinkProgram(cl_context p0, cl_uint p1, const cl_device_id* p2, const char* p3, cl_uint p4, const cl_program* p5, void (CL_CALLBACK*p6) (cl_program, void*), void* p7, cl_int* p8) { return clLinkProgram_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8); } +#undef clReleaseCommandQueue +#define clReleaseCommandQueue clReleaseCommandQueue_fn +inline cl_int clReleaseCommandQueue(cl_command_queue p0) { return clReleaseCommandQueue_pfn(p0); } +#undef clReleaseContext +#define clReleaseContext clReleaseContext_fn +inline cl_int clReleaseContext(cl_context p0) { return clReleaseContext_pfn(p0); } +#undef clReleaseDevice +#define clReleaseDevice clReleaseDevice_fn +inline cl_int clReleaseDevice(cl_device_id p0) { return clReleaseDevice_pfn(p0); } +#undef clReleaseEvent +#define clReleaseEvent clReleaseEvent_fn +inline cl_int clReleaseEvent(cl_event p0) { return clReleaseEvent_pfn(p0); } +#undef clReleaseKernel +#define clReleaseKernel clReleaseKernel_fn +inline cl_int clReleaseKernel(cl_kernel p0) { return clReleaseKernel_pfn(p0); } +#undef clReleaseMemObject +#define clReleaseMemObject clReleaseMemObject_fn +inline cl_int clReleaseMemObject(cl_mem p0) { return clReleaseMemObject_pfn(p0); } +#undef clReleaseProgram +#define clReleaseProgram clReleaseProgram_fn +inline cl_int clReleaseProgram(cl_program p0) { return clReleaseProgram_pfn(p0); } +#undef clReleaseSampler +#define clReleaseSampler clReleaseSampler_fn +inline cl_int clReleaseSampler(cl_sampler p0) { return clReleaseSampler_pfn(p0); } +#undef clRetainCommandQueue +#define clRetainCommandQueue clRetainCommandQueue_fn +inline cl_int clRetainCommandQueue(cl_command_queue p0) { return clRetainCommandQueue_pfn(p0); } +#undef clRetainContext +#define clRetainContext clRetainContext_fn +inline cl_int clRetainContext(cl_context p0) { return clRetainContext_pfn(p0); } +#undef clRetainDevice +#define clRetainDevice clRetainDevice_fn +inline cl_int clRetainDevice(cl_device_id p0) { return clRetainDevice_pfn(p0); } +#undef clRetainEvent +#define clRetainEvent clRetainEvent_fn +inline cl_int clRetainEvent(cl_event p0) { return clRetainEvent_pfn(p0); } +#undef clRetainKernel +#define clRetainKernel clRetainKernel_fn +inline cl_int clRetainKernel(cl_kernel p0) { return clRetainKernel_pfn(p0); } +#undef clRetainMemObject +#define clRetainMemObject clRetainMemObject_fn +inline cl_int clRetainMemObject(cl_mem p0) { return clRetainMemObject_pfn(p0); } +#undef clRetainProgram +#define clRetainProgram clRetainProgram_fn +inline cl_int clRetainProgram(cl_program p0) { return clRetainProgram_pfn(p0); } +#undef clRetainSampler +#define clRetainSampler clRetainSampler_fn +inline cl_int clRetainSampler(cl_sampler p0) { return clRetainSampler_pfn(p0); } +#undef clSetEventCallback +#define clSetEventCallback clSetEventCallback_fn +inline cl_int clSetEventCallback(cl_event p0, cl_int p1, void (CL_CALLBACK*p2) (cl_event, cl_int, void*), void* p3) { return clSetEventCallback_pfn(p0, p1, p2, p3); } +#undef clSetKernelArg +#define clSetKernelArg clSetKernelArg_fn +inline cl_int clSetKernelArg(cl_kernel p0, cl_uint p1, size_t p2, const void* p3) { return clSetKernelArg_pfn(p0, p1, p2, p3); } +#undef clSetMemObjectDestructorCallback +#define clSetMemObjectDestructorCallback clSetMemObjectDestructorCallback_fn +inline cl_int clSetMemObjectDestructorCallback(cl_mem p0, void (CL_CALLBACK*p1) (cl_mem, void*), void* p2) { return clSetMemObjectDestructorCallback_pfn(p0, p1, p2); } +#undef clSetUserEventStatus +#define clSetUserEventStatus clSetUserEventStatus_fn +inline cl_int clSetUserEventStatus(cl_event p0, cl_int p1) { return clSetUserEventStatus_pfn(p0, p1); } +#undef clUnloadCompiler +#define clUnloadCompiler clUnloadCompiler_fn +inline cl_int clUnloadCompiler() { return clUnloadCompiler_pfn(); } +#undef clUnloadPlatformCompiler +#define clUnloadPlatformCompiler clUnloadPlatformCompiler_fn +inline cl_int clUnloadPlatformCompiler(cl_platform_id p0) { return clUnloadPlatformCompiler_pfn(p0); } +#undef clWaitForEvents +#define clWaitForEvents clWaitForEvents_fn +inline cl_int clWaitForEvents(cl_uint p0, const cl_event* p1) { return clWaitForEvents_pfn(p0, p1); } diff --git a/IPL/include/opencv/opencv2/core/opencl/runtime/autogenerated/opencl_gl.hpp b/IPL/include/opencv/opencv2/core/opencl/runtime/autogenerated/opencl_gl.hpp new file mode 100644 index 0000000..0b12aed --- /dev/null +++ b/IPL/include/opencv/opencv2/core/opencl/runtime/autogenerated/opencl_gl.hpp @@ -0,0 +1,62 @@ +// +// AUTOGENERATED, DO NOT EDIT +// +#ifndef OPENCV_CORE_OCL_RUNTIME_OPENCL_GL_HPP +#error "Invalid usage" +#endif + +// generated by parser_cl.py +#define clCreateFromGLBuffer clCreateFromGLBuffer_ +#define clCreateFromGLRenderbuffer clCreateFromGLRenderbuffer_ +#define clCreateFromGLTexture clCreateFromGLTexture_ +#define clCreateFromGLTexture2D clCreateFromGLTexture2D_ +#define clCreateFromGLTexture3D clCreateFromGLTexture3D_ +#define clEnqueueAcquireGLObjects clEnqueueAcquireGLObjects_ +#define clEnqueueReleaseGLObjects clEnqueueReleaseGLObjects_ +#define clGetGLContextInfoKHR clGetGLContextInfoKHR_ +#define clGetGLObjectInfo clGetGLObjectInfo_ +#define clGetGLTextureInfo clGetGLTextureInfo_ + +#if defined __APPLE__ +#include +#else +#include +#endif + +// generated by parser_cl.py +#undef clCreateFromGLBuffer +#define clCreateFromGLBuffer clCreateFromGLBuffer_pfn +#undef clCreateFromGLRenderbuffer +#define clCreateFromGLRenderbuffer clCreateFromGLRenderbuffer_pfn +#undef clCreateFromGLTexture +#define clCreateFromGLTexture clCreateFromGLTexture_pfn +#undef clCreateFromGLTexture2D +#define clCreateFromGLTexture2D clCreateFromGLTexture2D_pfn +#undef clCreateFromGLTexture3D +#define clCreateFromGLTexture3D clCreateFromGLTexture3D_pfn +#undef clEnqueueAcquireGLObjects +#define clEnqueueAcquireGLObjects clEnqueueAcquireGLObjects_pfn +#undef clEnqueueReleaseGLObjects +#define clEnqueueReleaseGLObjects clEnqueueReleaseGLObjects_pfn +#undef clGetGLContextInfoKHR +#define clGetGLContextInfoKHR clGetGLContextInfoKHR_pfn +#undef clGetGLObjectInfo +#define clGetGLObjectInfo clGetGLObjectInfo_pfn +#undef clGetGLTextureInfo +#define clGetGLTextureInfo clGetGLTextureInfo_pfn + +#ifdef cl_khr_gl_sharing + +// generated by parser_cl.py +extern CL_RUNTIME_EXPORT cl_mem (CL_API_CALL*clCreateFromGLBuffer)(cl_context, cl_mem_flags, cl_GLuint, int*); +extern CL_RUNTIME_EXPORT cl_mem (CL_API_CALL*clCreateFromGLRenderbuffer)(cl_context, cl_mem_flags, cl_GLuint, cl_int*); +extern CL_RUNTIME_EXPORT cl_mem (CL_API_CALL*clCreateFromGLTexture)(cl_context, cl_mem_flags, cl_GLenum, cl_GLint, cl_GLuint, cl_int*); +extern CL_RUNTIME_EXPORT cl_mem (CL_API_CALL*clCreateFromGLTexture2D)(cl_context, cl_mem_flags, cl_GLenum, cl_GLint, cl_GLuint, cl_int*); +extern CL_RUNTIME_EXPORT cl_mem (CL_API_CALL*clCreateFromGLTexture3D)(cl_context, cl_mem_flags, cl_GLenum, cl_GLint, cl_GLuint, cl_int*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueAcquireGLObjects)(cl_command_queue, cl_uint, const cl_mem*, cl_uint, const cl_event*, cl_event*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueReleaseGLObjects)(cl_command_queue, cl_uint, const cl_mem*, cl_uint, const cl_event*, cl_event*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clGetGLContextInfoKHR)(const cl_context_properties*, cl_gl_context_info, size_t, void*, size_t*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clGetGLObjectInfo)(cl_mem, cl_gl_object_type*, cl_GLuint*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clGetGLTextureInfo)(cl_mem, cl_gl_texture_info, size_t, void*, size_t*); + +#endif // cl_khr_gl_sharing diff --git a/IPL/include/opencv/opencv2/core/opencl/runtime/autogenerated/opencl_gl_wrappers.hpp b/IPL/include/opencv/opencv2/core/opencl/runtime/autogenerated/opencl_gl_wrappers.hpp new file mode 100644 index 0000000..12f342b --- /dev/null +++ b/IPL/include/opencv/opencv2/core/opencl/runtime/autogenerated/opencl_gl_wrappers.hpp @@ -0,0 +1,42 @@ +// +// AUTOGENERATED, DO NOT EDIT +// +#ifndef OPENCV_CORE_OCL_RUNTIME_OPENCL_GL_WRAPPERS_HPP +#error "Invalid usage" +#endif + +#ifdef cl_khr_gl_sharing + +// generated by parser_cl.py +#undef clCreateFromGLBuffer +#define clCreateFromGLBuffer clCreateFromGLBuffer_fn +inline cl_mem clCreateFromGLBuffer(cl_context p0, cl_mem_flags p1, cl_GLuint p2, int* p3) { return clCreateFromGLBuffer_pfn(p0, p1, p2, p3); } +#undef clCreateFromGLRenderbuffer +#define clCreateFromGLRenderbuffer clCreateFromGLRenderbuffer_fn +inline cl_mem clCreateFromGLRenderbuffer(cl_context p0, cl_mem_flags p1, cl_GLuint p2, cl_int* p3) { return clCreateFromGLRenderbuffer_pfn(p0, p1, p2, p3); } +#undef clCreateFromGLTexture +#define clCreateFromGLTexture clCreateFromGLTexture_fn +inline cl_mem clCreateFromGLTexture(cl_context p0, cl_mem_flags p1, cl_GLenum p2, cl_GLint p3, cl_GLuint p4, cl_int* p5) { return clCreateFromGLTexture_pfn(p0, p1, p2, p3, p4, p5); } +#undef clCreateFromGLTexture2D +#define clCreateFromGLTexture2D clCreateFromGLTexture2D_fn +inline cl_mem clCreateFromGLTexture2D(cl_context p0, cl_mem_flags p1, cl_GLenum p2, cl_GLint p3, cl_GLuint p4, cl_int* p5) { return clCreateFromGLTexture2D_pfn(p0, p1, p2, p3, p4, p5); } +#undef clCreateFromGLTexture3D +#define clCreateFromGLTexture3D clCreateFromGLTexture3D_fn +inline cl_mem clCreateFromGLTexture3D(cl_context p0, cl_mem_flags p1, cl_GLenum p2, cl_GLint p3, cl_GLuint p4, cl_int* p5) { return clCreateFromGLTexture3D_pfn(p0, p1, p2, p3, p4, p5); } +#undef clEnqueueAcquireGLObjects +#define clEnqueueAcquireGLObjects clEnqueueAcquireGLObjects_fn +inline cl_int clEnqueueAcquireGLObjects(cl_command_queue p0, cl_uint p1, const cl_mem* p2, cl_uint p3, const cl_event* p4, cl_event* p5) { return clEnqueueAcquireGLObjects_pfn(p0, p1, p2, p3, p4, p5); } +#undef clEnqueueReleaseGLObjects +#define clEnqueueReleaseGLObjects clEnqueueReleaseGLObjects_fn +inline cl_int clEnqueueReleaseGLObjects(cl_command_queue p0, cl_uint p1, const cl_mem* p2, cl_uint p3, const cl_event* p4, cl_event* p5) { return clEnqueueReleaseGLObjects_pfn(p0, p1, p2, p3, p4, p5); } +#undef clGetGLContextInfoKHR +#define clGetGLContextInfoKHR clGetGLContextInfoKHR_fn +inline cl_int clGetGLContextInfoKHR(const cl_context_properties* p0, cl_gl_context_info p1, size_t p2, void* p3, size_t* p4) { return clGetGLContextInfoKHR_pfn(p0, p1, p2, p3, p4); } +#undef clGetGLObjectInfo +#define clGetGLObjectInfo clGetGLObjectInfo_fn +inline cl_int clGetGLObjectInfo(cl_mem p0, cl_gl_object_type* p1, cl_GLuint* p2) { return clGetGLObjectInfo_pfn(p0, p1, p2); } +#undef clGetGLTextureInfo +#define clGetGLTextureInfo clGetGLTextureInfo_fn +inline cl_int clGetGLTextureInfo(cl_mem p0, cl_gl_texture_info p1, size_t p2, void* p3, size_t* p4) { return clGetGLTextureInfo_pfn(p0, p1, p2, p3, p4); } + +#endif // cl_khr_gl_sharing diff --git a/IPL/include/opencv/opencv/cxcore.h b/IPL/include/opencv/opencv2/core/opencl/runtime/opencl_clamdblas.hpp similarity index 80% rename from IPL/include/opencv/opencv/cxcore.h rename to IPL/include/opencv/opencv2/core/opencl/runtime/opencl_clamdblas.hpp index 0982bd7..2ad8ac0 100644 --- a/IPL/include/opencv/opencv/cxcore.h +++ b/IPL/include/opencv/opencv2/core/opencl/runtime/opencl_clamdblas.hpp @@ -10,8 +10,7 @@ // License Agreement // For Open Source Computer Vision Library // -// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. -// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Copyright (C) 2010-2013, Advanced Micro Devices, Inc., all rights reserved. // Third party copyrights are property of their respective owners. // // Redistribution and use in source and binary forms, with or without modification, @@ -30,7 +29,7 @@ // This software is provided by the copyright holders and contributors "as is" and // any express or implied warranties, including, but not limited to, the implied // warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, +// In no event shall the OpenCV Foundation or contributors be liable for any direct, // indirect, incidental, special, exemplary, or consequential damages // (including, but not limited to, procurement of substitute goods or services; // loss of use, data, or profits; or business interruption) however caused @@ -40,13 +39,15 @@ // //M*/ -#ifndef __OPENCV_OLD_CXCORE_H__ -#define __OPENCV_OLD_CXCORE_H__ +#ifndef OPENCV_CORE_OCL_RUNTIME_CLAMDBLAS_HPP +#define OPENCV_CORE_OCL_RUNTIME_CLAMDBLAS_HPP -//#if defined(__GNUC__) -//#warning "This is a deprecated opencv header provided for compatibility. Please include a header from a corresponding opencv module" -//#endif +#ifdef HAVE_CLAMDBLAS -#include "opencv2/core/core_c.h" +#include "opencl_core.hpp" -#endif +#include "autogenerated/opencl_clamdblas.hpp" + +#endif // HAVE_CLAMDBLAS + +#endif // OPENCV_CORE_OCL_RUNTIME_CLAMDBLAS_HPP diff --git a/IPL/include/opencv/opencv/cxcore.hpp b/IPL/include/opencv/opencv2/core/opencl/runtime/opencl_clamdfft.hpp similarity index 80% rename from IPL/include/opencv/opencv/cxcore.hpp rename to IPL/include/opencv/opencv2/core/opencl/runtime/opencl_clamdfft.hpp index 9af4ac7..a328f72 100644 --- a/IPL/include/opencv/opencv/cxcore.hpp +++ b/IPL/include/opencv/opencv2/core/opencl/runtime/opencl_clamdfft.hpp @@ -10,8 +10,7 @@ // License Agreement // For Open Source Computer Vision Library // -// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. -// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Copyright (C) 2010-2013, Advanced Micro Devices, Inc., all rights reserved. // Third party copyrights are property of their respective owners. // // Redistribution and use in source and binary forms, with or without modification, @@ -30,7 +29,7 @@ // This software is provided by the copyright holders and contributors "as is" and // any express or implied warranties, including, but not limited to, the implied // warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, +// In no event shall the OpenCV Foundation or contributors be liable for any direct, // indirect, incidental, special, exemplary, or consequential damages // (including, but not limited to, procurement of substitute goods or services; // loss of use, data, or profits; or business interruption) however caused @@ -40,14 +39,15 @@ // //M*/ -#ifndef __OPENCV_OLD_CXCORE_HPP__ -#define __OPENCV_OLD_CXCORE_HPP__ +#ifndef OPENCV_CORE_OCL_RUNTIME_CLAMDFFT_HPP +#define OPENCV_CORE_OCL_RUNTIME_CLAMDFFT_HPP -//#if defined(__GNUC__) -//#warning "This is a deprecated opencv header provided for compatibility. Please include a header from a corresponding opencv module" -//#endif +#ifdef HAVE_CLAMDFFT -#include "cxcore.h" -#include "opencv2/core.hpp" +#include "opencl_core.hpp" -#endif +#include "autogenerated/opencl_clamdfft.hpp" + +#endif // HAVE_CLAMDFFT + +#endif // OPENCV_CORE_OCL_RUNTIME_CLAMDFFT_HPP diff --git a/IPL/include/opencv/opencv2/photo/photo_c.h b/IPL/include/opencv/opencv2/core/opencl/runtime/opencl_core.hpp similarity index 57% rename from IPL/include/opencv/opencv2/photo/photo_c.h rename to IPL/include/opencv/opencv2/core/opencl/runtime/opencl_core.hpp index 07ca9b3..0404b31 100644 --- a/IPL/include/opencv/opencv2/photo/photo_c.h +++ b/IPL/include/opencv/opencv2/core/opencl/runtime/opencl_core.hpp @@ -10,8 +10,7 @@ // License Agreement // For Open Source Computer Vision Library // -// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. -// Copyright (C) 2008-2012, Willow Garage Inc., all rights reserved. +// Copyright (C) 2010-2013, Advanced Micro Devices, Inc., all rights reserved. // Third party copyrights are property of their respective owners. // // Redistribution and use in source and binary forms, with or without modification, @@ -30,7 +29,7 @@ // This software is provided by the copyright holders and contributors "as is" and // any express or implied warranties, including, but not limited to, the implied // warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, +// In no event shall the OpenCV Foundation or contributors be liable for any direct, // indirect, incidental, special, exemplary, or consequential damages // (including, but not limited to, procurement of substitute goods or services; // loss of use, data, or profits; or business interruption) however caused @@ -40,35 +39,46 @@ // //M*/ -#ifndef __OPENCV_PHOTO_C_H__ -#define __OPENCV_PHOTO_C_H__ +#ifndef OPENCV_CORE_OCL_RUNTIME_OPENCL_CORE_HPP +#define OPENCV_CORE_OCL_RUNTIME_OPENCL_CORE_HPP -#include "opencv2/core/core_c.h" +#ifdef HAVE_OPENCL -#ifdef __cplusplus -extern "C" { +#ifndef CL_RUNTIME_EXPORT +#if (defined(BUILD_SHARED_LIBS) || defined(OPENCV_CORE_SHARED)) && (defined _WIN32 || defined WINCE) && \ + !(defined(__OPENCV_BUILD) && defined(OPENCV_MODULE_IS_PART_OF_WORLD)) +#define CL_RUNTIME_EXPORT __declspec(dllimport) +#else +#define CL_RUNTIME_EXPORT +#endif #endif -/** @addtogroup photo_c - @{ - */ - -/* Inpainting algorithms */ -enum InpaintingModes -{ - CV_INPAINT_NS =0, - CV_INPAINT_TELEA =1 -}; +#ifdef HAVE_OPENCL_SVM +#define clSVMAlloc clSVMAlloc_ +#define clSVMFree clSVMFree_ +#define clSetKernelArgSVMPointer clSetKernelArgSVMPointer_ +#define clSetKernelExecInfo clSetKernelExecInfo_ +#define clEnqueueSVMFree clEnqueueSVMFree_ +#define clEnqueueSVMMemcpy clEnqueueSVMMemcpy_ +#define clEnqueueSVMMemFill clEnqueueSVMMemFill_ +#define clEnqueueSVMMap clEnqueueSVMMap_ +#define clEnqueueSVMUnmap clEnqueueSVMUnmap_ +#endif +#include "autogenerated/opencl_core.hpp" -/* Inpaints the selected region in the image */ -CVAPI(void) cvInpaint( const CvArr* src, const CvArr* inpaint_mask, - CvArr* dst, double inpaintRange, int flags ); +#ifndef CL_DEVICE_DOUBLE_FP_CONFIG +#define CL_DEVICE_DOUBLE_FP_CONFIG 0x1032 +#endif -/** @} */ +#ifndef CL_DEVICE_HALF_FP_CONFIG +#define CL_DEVICE_HALF_FP_CONFIG 0x1033 +#endif -#ifdef __cplusplus -} //extern "C" +#ifndef CL_VERSION_1_2 +#define CV_REQUIRE_OPENCL_1_2_ERROR CV_Error(cv::Error::OpenCLApiCallError, "OpenCV compiled without OpenCL v1.2 support, so we can't use functionality from OpenCL v1.2") #endif -#endif //__OPENCV_PHOTO_C_H__ +#endif // HAVE_OPENCL + +#endif // OPENCV_CORE_OCL_RUNTIME_OPENCL_CORE_HPP diff --git a/IPL/include/opencv/opencv/cxeigen.hpp b/IPL/include/opencv/opencv2/core/opencl/runtime/opencl_core_wrappers.hpp similarity index 82% rename from IPL/include/opencv/opencv/cxeigen.hpp rename to IPL/include/opencv/opencv2/core/opencl/runtime/opencl_core_wrappers.hpp index 1f04d1a..38fcae9 100644 --- a/IPL/include/opencv/opencv/cxeigen.hpp +++ b/IPL/include/opencv/opencv2/core/opencl/runtime/opencl_core_wrappers.hpp @@ -7,11 +7,10 @@ // copy or use the software. // // -// License Agreement +// License Agreement // For Open Source Computer Vision Library // -// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. -// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Copyright (C) 2010-2013, Advanced Micro Devices, Inc., all rights reserved. // Third party copyrights are property of their respective owners. // // Redistribution and use in source and binary forms, with or without modification, @@ -30,7 +29,7 @@ // This software is provided by the copyright holders and contributors "as is" and // any express or implied warranties, including, but not limited to, the implied // warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, +// In no event shall the OpenCV Foundation or contributors be liable for any direct, // indirect, incidental, special, exemplary, or consequential damages // (including, but not limited to, procurement of substitute goods or services; // loss of use, data, or profits; or business interruption) however caused @@ -40,9 +39,9 @@ // //M*/ -#ifndef __OPENCV_OLD_EIGEN_HPP__ -#define __OPENCV_OLD_EIGEN_HPP__ +#ifndef OPENCV_CORE_OCL_RUNTIME_OPENCL_WRAPPERS_HPP +#define OPENCV_CORE_OCL_RUNTIME_OPENCL_WRAPPERS_HPP -#include "opencv2/core/eigen.hpp" +#include "autogenerated/opencl_core_wrappers.hpp" -#endif +#endif // OPENCV_CORE_OCL_RUNTIME_OPENCL_WRAPPERS_HPP diff --git a/IPL/include/opencv/opencv/cvaux.hpp b/IPL/include/opencv/opencv2/core/opencl/runtime/opencl_gl.hpp similarity index 75% rename from IPL/include/opencv/opencv/cvaux.hpp rename to IPL/include/opencv/opencv2/core/opencl/runtime/opencl_gl.hpp index b0e60a3..659c7d8 100644 --- a/IPL/include/opencv/opencv/cvaux.hpp +++ b/IPL/include/opencv/opencv2/core/opencl/runtime/opencl_gl.hpp @@ -7,10 +7,10 @@ // copy or use the software. // // -// Intel License Agreement +// License Agreement // For Open Source Computer Vision Library // -// Copyright (C) 2000, Intel Corporation, all rights reserved. +// Copyright (C) 2010-2013, Advanced Micro Devices, Inc., all rights reserved. // Third party copyrights are property of their respective owners. // // Redistribution and use in source and binary forms, with or without modification, @@ -23,13 +23,13 @@ // this list of conditions and the following disclaimer in the documentation // and/or other materials provided with the distribution. // -// * The name of Intel Corporation may not be used to endorse or promote products +// * The name of the copyright holders may not be used to endorse or promote products // derived from this software without specific prior written permission. // // This software is provided by the copyright holders and contributors "as is" and // any express or implied warranties, including, but not limited to, the implied // warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, +// In no event shall the OpenCV Foundation or contributors be liable for any direct, // indirect, incidental, special, exemplary, or consequential damages // (including, but not limited to, procurement of substitute goods or services; // loss of use, data, or profits; or business interruption) however caused @@ -39,14 +39,15 @@ // //M*/ -#ifndef __OPENCV_OLD_AUX_HPP__ -#define __OPENCV_OLD_AUX_HPP__ +#ifndef OPENCV_CORE_OCL_RUNTIME_OPENCL_GL_HPP +#define OPENCV_CORE_OCL_RUNTIME_OPENCL_GL_HPP -//#if defined(__GNUC__) -//#warning "This is a deprecated opencv header provided for compatibility. Please include a header from a corresponding opencv module" -//#endif +#if defined HAVE_OPENCL && defined HAVE_OPENGL -#include "cvaux.h" -#include "opencv2/core/utility.hpp" +#include "opencl_core.hpp" -#endif +#include "autogenerated/opencl_gl.hpp" + +#endif // defined HAVE_OPENCL && defined HAVE_OPENGL + +#endif // OPENCV_CORE_OCL_RUNTIME_OPENCL_GL_HPP diff --git a/IPL/include/opencv/opencv/highgui.h b/IPL/include/opencv/opencv2/core/opencl/runtime/opencl_gl_wrappers.hpp similarity index 78% rename from IPL/include/opencv/opencv/highgui.h rename to IPL/include/opencv/opencv2/core/opencl/runtime/opencl_gl_wrappers.hpp index 0261029..9700004 100644 --- a/IPL/include/opencv/opencv/highgui.h +++ b/IPL/include/opencv/opencv2/core/opencl/runtime/opencl_gl_wrappers.hpp @@ -7,10 +7,10 @@ // copy or use the software. // // -// Intel License Agreement +// License Agreement // For Open Source Computer Vision Library // -// Copyright (C) 2000, Intel Corporation, all rights reserved. +// Copyright (C) 2010-2013, Advanced Micro Devices, Inc., all rights reserved. // Third party copyrights are property of their respective owners. // // Redistribution and use in source and binary forms, with or without modification, @@ -23,13 +23,13 @@ // this list of conditions and the following disclaimer in the documentation // and/or other materials provided with the distribution. // -// * The name of Intel Corporation may not be used to endorse or promote products +// * The name of the copyright holders may not be used to endorse or promote products // derived from this software without specific prior written permission. // // This software is provided by the copyright holders and contributors "as is" and // any express or implied warranties, including, but not limited to, the implied // warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, +// In no event shall the OpenCV Foundation or contributors be liable for any direct, // indirect, incidental, special, exemplary, or consequential damages // (including, but not limited to, procurement of substitute goods or services; // loss of use, data, or profits; or business interruption) however caused @@ -39,10 +39,9 @@ // //M*/ -#ifndef __OPENCV_OLD_HIGHGUI_H__ -#define __OPENCV_OLD_HIGHGUI_H__ +#ifndef OPENCV_CORE_OCL_RUNTIME_OPENCL_GL_WRAPPERS_HPP +#define OPENCV_CORE_OCL_RUNTIME_OPENCL_GL_WRAPPERS_HPP -#include "opencv2/core/core_c.h" -#include "opencv2/highgui/highgui_c.h" +#include "autogenerated/opencl_gl_wrappers.hpp" -#endif +#endif // OPENCV_CORE_OCL_RUNTIME_OPENCL_GL_WRAPPERS_HPP diff --git a/IPL/include/opencv/opencv2/core/opencl/runtime/opencl_svm_20.hpp b/IPL/include/opencv/opencv2/core/opencl/runtime/opencl_svm_20.hpp new file mode 100644 index 0000000..9636b19 --- /dev/null +++ b/IPL/include/opencv/opencv2/core/opencl/runtime/opencl_svm_20.hpp @@ -0,0 +1,48 @@ +/* See LICENSE file in the root OpenCV directory */ + +#ifndef OPENCV_CORE_OCL_RUNTIME_OPENCL_SVM_2_0_HPP +#define OPENCV_CORE_OCL_RUNTIME_OPENCL_SVM_2_0_HPP + +#if defined(HAVE_OPENCL_SVM) +#include "opencl_core.hpp" + +#include "opencl_svm_definitions.hpp" + +#undef clSVMAlloc +#define clSVMAlloc clSVMAlloc_pfn +#undef clSVMFree +#define clSVMFree clSVMFree_pfn +#undef clSetKernelArgSVMPointer +#define clSetKernelArgSVMPointer clSetKernelArgSVMPointer_pfn +#undef clSetKernelExecInfo +//#define clSetKernelExecInfo clSetKernelExecInfo_pfn +#undef clEnqueueSVMFree +//#define clEnqueueSVMFree clEnqueueSVMFree_pfn +#undef clEnqueueSVMMemcpy +#define clEnqueueSVMMemcpy clEnqueueSVMMemcpy_pfn +#undef clEnqueueSVMMemFill +#define clEnqueueSVMMemFill clEnqueueSVMMemFill_pfn +#undef clEnqueueSVMMap +#define clEnqueueSVMMap clEnqueueSVMMap_pfn +#undef clEnqueueSVMUnmap +#define clEnqueueSVMUnmap clEnqueueSVMUnmap_pfn + +extern CL_RUNTIME_EXPORT void* (CL_API_CALL *clSVMAlloc)(cl_context context, cl_svm_mem_flags flags, size_t size, unsigned int alignment); +extern CL_RUNTIME_EXPORT void (CL_API_CALL *clSVMFree)(cl_context context, void* svm_pointer); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL *clSetKernelArgSVMPointer)(cl_kernel kernel, cl_uint arg_index, const void* arg_value); +//extern CL_RUNTIME_EXPORT void* (CL_API_CALL *clSetKernelExecInfo)(cl_kernel kernel, cl_kernel_exec_info param_name, size_t param_value_size, const void* param_value); +//extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL *clEnqueueSVMFree)(cl_command_queue command_queue, cl_uint num_svm_pointers, void* svm_pointers[], +// void (CL_CALLBACK *pfn_free_func)(cl_command_queue queue, cl_uint num_svm_pointers, void* svm_pointers[], void* user_data), void* user_data, +// cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL *clEnqueueSVMMemcpy)(cl_command_queue command_queue, cl_bool blocking_copy, void* dst_ptr, const void* src_ptr, size_t size, + cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL *clEnqueueSVMMemFill)(cl_command_queue command_queue, void* svm_ptr, const void* pattern, size_t pattern_size, size_t size, + cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL *clEnqueueSVMMap)(cl_command_queue command_queue, cl_bool blocking_map, cl_map_flags map_flags, void* svm_ptr, size_t size, + cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL *clEnqueueSVMUnmap)(cl_command_queue command_queue, void* svm_ptr, + cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event); + +#endif // HAVE_OPENCL_SVM + +#endif // OPENCV_CORE_OCL_RUNTIME_OPENCL_SVM_2_0_HPP diff --git a/IPL/include/opencv/opencv2/core/opencl/runtime/opencl_svm_definitions.hpp b/IPL/include/opencv/opencv2/core/opencl/runtime/opencl_svm_definitions.hpp new file mode 100644 index 0000000..97c927b --- /dev/null +++ b/IPL/include/opencv/opencv2/core/opencl/runtime/opencl_svm_definitions.hpp @@ -0,0 +1,42 @@ +/* See LICENSE file in the root OpenCV directory */ + +#ifndef OPENCV_CORE_OCL_RUNTIME_OPENCL_SVM_DEFINITIONS_HPP +#define OPENCV_CORE_OCL_RUNTIME_OPENCL_SVM_DEFINITIONS_HPP + +#if defined(HAVE_OPENCL_SVM) +#if defined(CL_VERSION_2_0) + +// OpenCL 2.0 contains SVM definitions + +#else + +typedef cl_bitfield cl_device_svm_capabilities; +typedef cl_bitfield cl_svm_mem_flags; +typedef cl_uint cl_kernel_exec_info; + +// +// TODO Add real values after OpenCL 2.0 release +// + +#ifndef CL_DEVICE_SVM_CAPABILITIES +#define CL_DEVICE_SVM_CAPABILITIES 0x1053 + +#define CL_DEVICE_SVM_COARSE_GRAIN_BUFFER (1 << 0) +#define CL_DEVICE_SVM_FINE_GRAIN_BUFFER (1 << 1) +#define CL_DEVICE_SVM_FINE_GRAIN_SYSTEM (1 << 2) +#define CL_DEVICE_SVM_ATOMICS (1 << 3) +#endif + +#ifndef CL_MEM_SVM_FINE_GRAIN_BUFFER +#define CL_MEM_SVM_FINE_GRAIN_BUFFER (1 << 10) +#endif + +#ifndef CL_MEM_SVM_ATOMICS +#define CL_MEM_SVM_ATOMICS (1 << 11) +#endif + + +#endif // CL_VERSION_2_0 +#endif // HAVE_OPENCL_SVM + +#endif // OPENCV_CORE_OCL_RUNTIME_OPENCL_SVM_DEFINITIONS_HPP diff --git a/IPL/include/opencv/opencv2/core/opencl/runtime/opencl_svm_hsa_extension.hpp b/IPL/include/opencv/opencv2/core/opencl/runtime/opencl_svm_hsa_extension.hpp new file mode 100644 index 0000000..497bc3d --- /dev/null +++ b/IPL/include/opencv/opencv2/core/opencl/runtime/opencl_svm_hsa_extension.hpp @@ -0,0 +1,166 @@ +/* See LICENSE file in the root OpenCV directory */ + +#ifndef OPENCV_CORE_OCL_RUNTIME_OPENCL_SVM_HSA_EXTENSION_HPP +#define OPENCV_CORE_OCL_RUNTIME_OPENCL_SVM_HSA_EXTENSION_HPP + +#if defined(HAVE_OPENCL_SVM) +#include "opencl_core.hpp" + +#ifndef CL_DEVICE_SVM_CAPABILITIES_AMD +// +// Part of the file is an extract from the cl_ext.h file from AMD APP SDK package. +// Below is the original copyright. +// +/******************************************************************************* + * Copyright (c) 2008-2013 The Khronos Group Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and/or associated documentation files (the + * "Materials"), to deal in the Materials without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Materials, and to + * permit persons to whom the Materials are furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Materials. + * + * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. + ******************************************************************************/ + +/******************************************* + * Shared Virtual Memory (SVM) extension + *******************************************/ +typedef cl_bitfield cl_device_svm_capabilities_amd; +typedef cl_bitfield cl_svm_mem_flags_amd; +typedef cl_uint cl_kernel_exec_info_amd; + +/* cl_device_info */ +#define CL_DEVICE_SVM_CAPABILITIES_AMD 0x1053 +#define CL_DEVICE_PREFERRED_PLATFORM_ATOMIC_ALIGNMENT_AMD 0x1054 + +/* cl_device_svm_capabilities_amd */ +#define CL_DEVICE_SVM_COARSE_GRAIN_BUFFER_AMD (1 << 0) +#define CL_DEVICE_SVM_FINE_GRAIN_BUFFER_AMD (1 << 1) +#define CL_DEVICE_SVM_FINE_GRAIN_SYSTEM_AMD (1 << 2) +#define CL_DEVICE_SVM_ATOMICS_AMD (1 << 3) + +/* cl_svm_mem_flags_amd */ +#define CL_MEM_SVM_FINE_GRAIN_BUFFER_AMD (1 << 10) +#define CL_MEM_SVM_ATOMICS_AMD (1 << 11) + +/* cl_mem_info */ +#define CL_MEM_USES_SVM_POINTER_AMD 0x1109 + +/* cl_kernel_exec_info_amd */ +#define CL_KERNEL_EXEC_INFO_SVM_PTRS_AMD 0x11B6 +#define CL_KERNEL_EXEC_INFO_SVM_FINE_GRAIN_SYSTEM_AMD 0x11B7 + +/* cl_command_type */ +#define CL_COMMAND_SVM_FREE_AMD 0x1209 +#define CL_COMMAND_SVM_MEMCPY_AMD 0x120A +#define CL_COMMAND_SVM_MEMFILL_AMD 0x120B +#define CL_COMMAND_SVM_MAP_AMD 0x120C +#define CL_COMMAND_SVM_UNMAP_AMD 0x120D + +typedef CL_API_ENTRY void* +(CL_API_CALL * clSVMAllocAMD_fn)( + cl_context /* context */, + cl_svm_mem_flags_amd /* flags */, + size_t /* size */, + unsigned int /* alignment */ +) CL_EXT_SUFFIX__VERSION_1_2; + +typedef CL_API_ENTRY void +(CL_API_CALL * clSVMFreeAMD_fn)( + cl_context /* context */, + void* /* svm_pointer */ +) CL_EXT_SUFFIX__VERSION_1_2; + +typedef CL_API_ENTRY cl_int +(CL_API_CALL * clEnqueueSVMFreeAMD_fn)( + cl_command_queue /* command_queue */, + cl_uint /* num_svm_pointers */, + void** /* svm_pointers */, + void (CL_CALLBACK *)( /*pfn_free_func*/ + cl_command_queue /* queue */, + cl_uint /* num_svm_pointers */, + void** /* svm_pointers */, + void* /* user_data */), + void* /* user_data */, + cl_uint /* num_events_in_wait_list */, + const cl_event* /* event_wait_list */, + cl_event* /* event */ +) CL_EXT_SUFFIX__VERSION_1_2; + +typedef CL_API_ENTRY cl_int +(CL_API_CALL * clEnqueueSVMMemcpyAMD_fn)( + cl_command_queue /* command_queue */, + cl_bool /* blocking_copy */, + void* /* dst_ptr */, + const void* /* src_ptr */, + size_t /* size */, + cl_uint /* num_events_in_wait_list */, + const cl_event* /* event_wait_list */, + cl_event* /* event */ +) CL_EXT_SUFFIX__VERSION_1_2; + +typedef CL_API_ENTRY cl_int +(CL_API_CALL * clEnqueueSVMMemFillAMD_fn)( + cl_command_queue /* command_queue */, + void* /* svm_ptr */, + const void* /* pattern */, + size_t /* pattern_size */, + size_t /* size */, + cl_uint /* num_events_in_wait_list */, + const cl_event* /* event_wait_list */, + cl_event* /* event */ +) CL_EXT_SUFFIX__VERSION_1_2; + +typedef CL_API_ENTRY cl_int +(CL_API_CALL * clEnqueueSVMMapAMD_fn)( + cl_command_queue /* command_queue */, + cl_bool /* blocking_map */, + cl_map_flags /* map_flags */, + void* /* svm_ptr */, + size_t /* size */, + cl_uint /* num_events_in_wait_list */, + const cl_event* /* event_wait_list */, + cl_event* /* event */ +) CL_EXT_SUFFIX__VERSION_1_2; + +typedef CL_API_ENTRY cl_int +(CL_API_CALL * clEnqueueSVMUnmapAMD_fn)( + cl_command_queue /* command_queue */, + void* /* svm_ptr */, + cl_uint /* num_events_in_wait_list */, + const cl_event* /* event_wait_list */, + cl_event* /* event */ +) CL_EXT_SUFFIX__VERSION_1_2; + +typedef CL_API_ENTRY cl_int +(CL_API_CALL * clSetKernelArgSVMPointerAMD_fn)( + cl_kernel /* kernel */, + cl_uint /* arg_index */, + const void * /* arg_value */ +) CL_EXT_SUFFIX__VERSION_1_2; + +typedef CL_API_ENTRY cl_int +(CL_API_CALL * clSetKernelExecInfoAMD_fn)( + cl_kernel /* kernel */, + cl_kernel_exec_info_amd /* param_name */, + size_t /* param_value_size */, + const void * /* param_value */ +) CL_EXT_SUFFIX__VERSION_1_2; + +#endif + +#endif // HAVE_OPENCL_SVM + +#endif // OPENCV_CORE_OCL_RUNTIME_OPENCL_SVM_HSA_EXTENSION_HPP diff --git a/IPL/include/opencv/opencv2/core/opengl.hpp b/IPL/include/opencv/opencv2/core/opengl.hpp index fd47c52..a311ce2 100644 --- a/IPL/include/opencv/opencv2/core/opengl.hpp +++ b/IPL/include/opencv/opencv2/core/opengl.hpp @@ -40,8 +40,8 @@ // //M*/ -#ifndef __OPENCV_CORE_OPENGL_HPP__ -#define __OPENCV_CORE_OPENGL_HPP__ +#ifndef OPENCV_CORE_OPENGL_HPP +#define OPENCV_CORE_OPENGL_HPP #ifndef __cplusplus # error opengl.hpp header must be compiled as C++ @@ -245,7 +245,7 @@ class CV_EXPORTS Buffer /** @brief Maps OpenGL buffer to CUDA device memory. - This operatation doesn't copy data. Several buffer objects can be mapped to CUDA memory at a time. + This operation doesn't copy data. Several buffer objects can be mapped to CUDA memory at a time. A mapped data store must be unmapped with ogl::Buffer::unmapDevice before its buffer object is used. */ @@ -548,7 +548,7 @@ calling unmapGLBuffer() function. @param accessFlags - data access flags (ACCESS_READ|ACCESS_WRITE). @return Returns UMat object */ -CV_EXPORTS UMat mapGLBuffer(const Buffer& buffer, int accessFlags = ACCESS_READ|ACCESS_WRITE); +CV_EXPORTS UMat mapGLBuffer(const Buffer& buffer, AccessFlag accessFlags = ACCESS_READ | ACCESS_WRITE); /** @brief Unmaps Buffer object (releases UMat, previously mapped from Buffer). @@ -558,13 +558,11 @@ by the call to mapGLBuffer() function. */ CV_EXPORTS void unmapGLBuffer(UMat& u); +//! @} }} // namespace cv::ogl namespace cv { namespace cuda { -//! @addtogroup cuda -//! @{ - /** @brief Sets a CUDA device and initializes it for the current thread with OpenGL interoperability. This function should be explicitly called after OpenGL context creation and before any CUDA calls. @@ -573,8 +571,6 @@ This function should be explicitly called after OpenGL context creation and befo */ CV_EXPORTS void setGlDevice(int device = 0); -//! @} - }} //! @cond IGNORED @@ -726,4 +722,4 @@ bool cv::ogl::Arrays::empty() const //! @endcond -#endif /* __OPENCV_CORE_OPENGL_HPP__ */ +#endif /* OPENCV_CORE_OPENGL_HPP */ diff --git a/IPL/include/opencv/opencv2/core/operations.hpp b/IPL/include/opencv/opencv2/core/operations.hpp index bced1a7..bde28c4 100644 --- a/IPL/include/opencv/opencv2/core/operations.hpp +++ b/IPL/include/opencv/opencv2/core/operations.hpp @@ -42,8 +42,8 @@ // //M*/ -#ifndef __OPENCV_CORE_OPERATIONS_HPP__ -#define __OPENCV_CORE_OPERATIONS_HPP__ +#ifndef OPENCV_CORE_OPERATIONS_HPP +#define OPENCV_CORE_OPERATIONS_HPP #ifndef __cplusplus # error operations.hpp header must be compiled as C++ @@ -51,6 +51,16 @@ #include +#if defined(__GNUC__) || defined(__clang__) // at least GCC 3.1+, clang 3.5+ +# if defined(__MINGW_PRINTF_FORMAT) // https://sourceforge.net/p/mingw-w64/wiki2/gnu%20printf/. +# define CV_FORMAT_PRINTF(string_idx, first_to_check) __attribute__ ((format (__MINGW_PRINTF_FORMAT, string_idx, first_to_check))) +# else +# define CV_FORMAT_PRINTF(string_idx, first_to_check) __attribute__ ((format (printf, string_idx, first_to_check))) +# endif +#else +# define CV_FORMAT_PRINTF(A, B) +#endif + //! @cond IGNORED namespace cv @@ -61,29 +71,44 @@ namespace cv namespace internal { -template struct Matx_FastInvOp +template struct Matx_FastInvOp +{ + bool operator()(const Matx<_Tp, m, n>& a, Matx<_Tp, n, m>& b, int method) const + { + return invert(a, b, method) != 0; + } +}; + +template struct Matx_FastInvOp<_Tp, m, m> { bool operator()(const Matx<_Tp, m, m>& a, Matx<_Tp, m, m>& b, int method) const { - Matx<_Tp, m, m> temp = a; + if (method == DECOMP_LU || method == DECOMP_CHOLESKY) + { + Matx<_Tp, m, m> temp = a; - // assume that b is all 0's on input => make it a unity matrix - for( int i = 0; i < m; i++ ) - b(i, i) = (_Tp)1; + // assume that b is all 0's on input => make it a unity matrix + for (int i = 0; i < m; i++) + b(i, i) = (_Tp)1; - if( method == DECOMP_CHOLESKY ) - return Cholesky(temp.val, m*sizeof(_Tp), m, b.val, m*sizeof(_Tp), m); + if (method == DECOMP_CHOLESKY) + return Cholesky(temp.val, m*sizeof(_Tp), m, b.val, m*sizeof(_Tp), m); - return LU(temp.val, m*sizeof(_Tp), m, b.val, m*sizeof(_Tp), m) != 0; + return LU(temp.val, m*sizeof(_Tp), m, b.val, m*sizeof(_Tp), m) != 0; + } + else + { + return invert(a, b, method) != 0; + } } }; -template struct Matx_FastInvOp<_Tp, 2> +template struct Matx_FastInvOp<_Tp, 2, 2> { - bool operator()(const Matx<_Tp, 2, 2>& a, Matx<_Tp, 2, 2>& b, int) const + bool operator()(const Matx<_Tp, 2, 2>& a, Matx<_Tp, 2, 2>& b, int /*method*/) const { - _Tp d = determinant(a); - if( d == 0 ) + _Tp d = (_Tp)determinant(a); + if (d == 0) return false; d = 1/d; b(1,1) = a(0,0)*d; @@ -94,12 +119,12 @@ template struct Matx_FastInvOp<_Tp, 2> } }; -template struct Matx_FastInvOp<_Tp, 3> +template struct Matx_FastInvOp<_Tp, 3, 3> { - bool operator()(const Matx<_Tp, 3, 3>& a, Matx<_Tp, 3, 3>& b, int) const + bool operator()(const Matx<_Tp, 3, 3>& a, Matx<_Tp, 3, 3>& b, int /*method*/) const { _Tp d = (_Tp)determinant(a); - if( d == 0 ) + if (d == 0) return false; d = 1/d; b(0,0) = (a(1,1) * a(2,2) - a(1,2) * a(2,1)) * d; @@ -118,27 +143,43 @@ template struct Matx_FastInvOp<_Tp, 3> }; -template struct Matx_FastSolveOp +template struct Matx_FastSolveOp +{ + bool operator()(const Matx<_Tp, m, l>& a, const Matx<_Tp, m, n>& b, + Matx<_Tp, l, n>& x, int method) const + { + return cv::solve(a, b, x, method); + } +}; + +template struct Matx_FastSolveOp<_Tp, m, m, n> { bool operator()(const Matx<_Tp, m, m>& a, const Matx<_Tp, m, n>& b, Matx<_Tp, m, n>& x, int method) const { - Matx<_Tp, m, m> temp = a; - x = b; - if( method == DECOMP_CHOLESKY ) - return Cholesky(temp.val, m*sizeof(_Tp), m, x.val, n*sizeof(_Tp), n); + if (method == DECOMP_LU || method == DECOMP_CHOLESKY) + { + Matx<_Tp, m, m> temp = a; + x = b; + if( method == DECOMP_CHOLESKY ) + return Cholesky(temp.val, m*sizeof(_Tp), m, x.val, n*sizeof(_Tp), n); - return LU(temp.val, m*sizeof(_Tp), m, x.val, n*sizeof(_Tp), n) != 0; + return LU(temp.val, m*sizeof(_Tp), m, x.val, n*sizeof(_Tp), n) != 0; + } + else + { + return cv::solve(a, b, x, method); + } } }; -template struct Matx_FastSolveOp<_Tp, 2, 1> +template struct Matx_FastSolveOp<_Tp, 2, 2, 1> { bool operator()(const Matx<_Tp, 2, 2>& a, const Matx<_Tp, 2, 1>& b, Matx<_Tp, 2, 1>& x, int) const { - _Tp d = determinant(a); - if( d == 0 ) + _Tp d = (_Tp)determinant(a); + if (d == 0) return false; d = 1/d; x(0) = (b(0)*a(1,1) - b(1)*a(0,1))*d; @@ -147,13 +188,13 @@ template struct Matx_FastSolveOp<_Tp, 2, 1> } }; -template struct Matx_FastSolveOp<_Tp, 3, 1> +template struct Matx_FastSolveOp<_Tp, 3, 3, 1> { bool operator()(const Matx<_Tp, 3, 3>& a, const Matx<_Tp, 3, 1>& b, Matx<_Tp, 3, 1>& x, int) const { _Tp d = (_Tp)determinant(a); - if( d == 0 ) + if (d == 0) return false; d = 1/d; x(0) = d*(b(0)*(a(1,1)*a(2,2) - a(1,2)*a(2,1)) - @@ -193,15 +234,8 @@ template inline Matx<_Tp, n, m> Matx<_Tp, m, n>::inv(int method, bool *p_is_ok /*= NULL*/) const { Matx<_Tp, n, m> b; - bool ok; - if( method == DECOMP_LU || method == DECOMP_CHOLESKY ) - ok = cv::internal::Matx_FastInvOp<_Tp, m>()(*this, b, method); - else - { - Mat A(*this, false), B(b, false); - ok = (invert(A, B, method) != 0); - } - if( NULL != p_is_ok ) { *p_is_ok = ok; } + bool ok = cv::internal::Matx_FastInvOp<_Tp, m, n>()(*this, b, method); + if (p_is_ok) *p_is_ok = ok; return ok ? b : Matx<_Tp, n, m>::zeros(); } @@ -209,15 +243,7 @@ template template inline Matx<_Tp, n, l> Matx<_Tp, m, n>::solve(const Matx<_Tp, m, l>& rhs, int method) const { Matx<_Tp, n, l> x; - bool ok; - if( method == DECOMP_LU || method == DECOMP_CHOLESKY ) - ok = cv::internal::Matx_FastSolveOp<_Tp, m, l>()(*this, rhs, x, method); - else - { - Mat A(*this, false), B(rhs, false), X(x, false); - ok = cv::solve(A, B, X, method); - } - + bool ok = cv::internal::Matx_FastSolveOp<_Tp, m, n, l>()(*this, rhs, x, method); return ok ? x : Matx<_Tp, n, l>::zeros(); } @@ -236,48 +262,67 @@ Matx<_Tp, n, l> Matx<_Tp, m, n>::solve(const Matx<_Tp, m, l>& rhs, int method) c template CV_MAT_AUG_OPERATOR1(op, cvop, A, B) \ template CV_MAT_AUG_OPERATOR1(op, cvop, const A, B) -CV_MAT_AUG_OPERATOR (+=, cv::add(a,b,a), Mat, Mat) -CV_MAT_AUG_OPERATOR (+=, cv::add(a,b,a), Mat, Scalar) -CV_MAT_AUG_OPERATOR_T(+=, cv::add(a,b,a), Mat_<_Tp>, Mat) -CV_MAT_AUG_OPERATOR_T(+=, cv::add(a,b,a), Mat_<_Tp>, Scalar) -CV_MAT_AUG_OPERATOR_T(+=, cv::add(a,b,a), Mat_<_Tp>, Mat_<_Tp>) - -CV_MAT_AUG_OPERATOR (-=, cv::subtract(a,b,a), Mat, Mat) -CV_MAT_AUG_OPERATOR (-=, cv::subtract(a,b,a), Mat, Scalar) -CV_MAT_AUG_OPERATOR_T(-=, cv::subtract(a,b,a), Mat_<_Tp>, Mat) -CV_MAT_AUG_OPERATOR_T(-=, cv::subtract(a,b,a), Mat_<_Tp>, Scalar) -CV_MAT_AUG_OPERATOR_T(-=, cv::subtract(a,b,a), Mat_<_Tp>, Mat_<_Tp>) +#define CV_MAT_AUG_OPERATOR_TN(op, cvop, A) \ + template static inline A& operator op (A& a, const Matx<_Tp,m,n>& b) { cvop; return a; } \ + template static inline const A& operator op (const A& a, const Matx<_Tp,m,n>& b) { cvop; return a; } + +CV_MAT_AUG_OPERATOR (+=, cv::add(a, b, (const Mat&)a), Mat, Mat) +CV_MAT_AUG_OPERATOR (+=, cv::add(a, b, (const Mat&)a), Mat, Scalar) +CV_MAT_AUG_OPERATOR_T(+=, cv::add(a, b, (const Mat&)a), Mat_<_Tp>, Mat) +CV_MAT_AUG_OPERATOR_T(+=, cv::add(a, b, (const Mat&)a), Mat_<_Tp>, Scalar) +CV_MAT_AUG_OPERATOR_T(+=, cv::add(a, b, (const Mat&)a), Mat_<_Tp>, Mat_<_Tp>) +CV_MAT_AUG_OPERATOR_TN(+=, cv::add(a, Mat(b), (const Mat&)a), Mat) +CV_MAT_AUG_OPERATOR_TN(+=, cv::add(a, Mat(b), (const Mat&)a), Mat_<_Tp>) + +CV_MAT_AUG_OPERATOR (-=, cv::subtract(a, b, (const Mat&)a), Mat, Mat) +CV_MAT_AUG_OPERATOR (-=, cv::subtract(a, b, (const Mat&)a), Mat, Scalar) +CV_MAT_AUG_OPERATOR_T(-=, cv::subtract(a, b, (const Mat&)a), Mat_<_Tp>, Mat) +CV_MAT_AUG_OPERATOR_T(-=, cv::subtract(a, b, (const Mat&)a), Mat_<_Tp>, Scalar) +CV_MAT_AUG_OPERATOR_T(-=, cv::subtract(a, b, (const Mat&)a), Mat_<_Tp>, Mat_<_Tp>) +CV_MAT_AUG_OPERATOR_TN(-=, cv::subtract(a, Mat(b), (const Mat&)a), Mat) +CV_MAT_AUG_OPERATOR_TN(-=, cv::subtract(a, Mat(b), (const Mat&)a), Mat_<_Tp>) CV_MAT_AUG_OPERATOR (*=, cv::gemm(a, b, 1, Mat(), 0, a, 0), Mat, Mat) CV_MAT_AUG_OPERATOR_T(*=, cv::gemm(a, b, 1, Mat(), 0, a, 0), Mat_<_Tp>, Mat) CV_MAT_AUG_OPERATOR_T(*=, cv::gemm(a, b, 1, Mat(), 0, a, 0), Mat_<_Tp>, Mat_<_Tp>) CV_MAT_AUG_OPERATOR (*=, a.convertTo(a, -1, b), Mat, double) CV_MAT_AUG_OPERATOR_T(*=, a.convertTo(a, -1, b), Mat_<_Tp>, double) +CV_MAT_AUG_OPERATOR_TN(*=, cv::gemm(a, Mat(b), 1, Mat(), 0, a, 0), Mat) +CV_MAT_AUG_OPERATOR_TN(*=, cv::gemm(a, Mat(b), 1, Mat(), 0, a, 0), Mat_<_Tp>) -CV_MAT_AUG_OPERATOR (/=, cv::divide(a,b,a), Mat, Mat) -CV_MAT_AUG_OPERATOR_T(/=, cv::divide(a,b,a), Mat_<_Tp>, Mat) -CV_MAT_AUG_OPERATOR_T(/=, cv::divide(a,b,a), Mat_<_Tp>, Mat_<_Tp>) +CV_MAT_AUG_OPERATOR (/=, cv::divide(a, b, (const Mat&)a), Mat, Mat) +CV_MAT_AUG_OPERATOR_T(/=, cv::divide(a, b, (const Mat&)a), Mat_<_Tp>, Mat) +CV_MAT_AUG_OPERATOR_T(/=, cv::divide(a, b, (const Mat&)a), Mat_<_Tp>, Mat_<_Tp>) CV_MAT_AUG_OPERATOR (/=, a.convertTo((Mat&)a, -1, 1./b), Mat, double) CV_MAT_AUG_OPERATOR_T(/=, a.convertTo((Mat&)a, -1, 1./b), Mat_<_Tp>, double) - -CV_MAT_AUG_OPERATOR (&=, cv::bitwise_and(a,b,a), Mat, Mat) -CV_MAT_AUG_OPERATOR (&=, cv::bitwise_and(a,b,a), Mat, Scalar) -CV_MAT_AUG_OPERATOR_T(&=, cv::bitwise_and(a,b,a), Mat_<_Tp>, Mat) -CV_MAT_AUG_OPERATOR_T(&=, cv::bitwise_and(a,b,a), Mat_<_Tp>, Scalar) -CV_MAT_AUG_OPERATOR_T(&=, cv::bitwise_and(a,b,a), Mat_<_Tp>, Mat_<_Tp>) - -CV_MAT_AUG_OPERATOR (|=, cv::bitwise_or(a,b,a), Mat, Mat) -CV_MAT_AUG_OPERATOR (|=, cv::bitwise_or(a,b,a), Mat, Scalar) -CV_MAT_AUG_OPERATOR_T(|=, cv::bitwise_or(a,b,a), Mat_<_Tp>, Mat) -CV_MAT_AUG_OPERATOR_T(|=, cv::bitwise_or(a,b,a), Mat_<_Tp>, Scalar) -CV_MAT_AUG_OPERATOR_T(|=, cv::bitwise_or(a,b,a), Mat_<_Tp>, Mat_<_Tp>) - -CV_MAT_AUG_OPERATOR (^=, cv::bitwise_xor(a,b,a), Mat, Mat) -CV_MAT_AUG_OPERATOR (^=, cv::bitwise_xor(a,b,a), Mat, Scalar) -CV_MAT_AUG_OPERATOR_T(^=, cv::bitwise_xor(a,b,a), Mat_<_Tp>, Mat) -CV_MAT_AUG_OPERATOR_T(^=, cv::bitwise_xor(a,b,a), Mat_<_Tp>, Scalar) -CV_MAT_AUG_OPERATOR_T(^=, cv::bitwise_xor(a,b,a), Mat_<_Tp>, Mat_<_Tp>) - +CV_MAT_AUG_OPERATOR_TN(/=, cv::divide(a, Mat(b), (const Mat&)a), Mat) +CV_MAT_AUG_OPERATOR_TN(/=, cv::divide(a, Mat(b), (const Mat&)a), Mat_<_Tp>) + +CV_MAT_AUG_OPERATOR (&=, cv::bitwise_and(a, b, (const Mat&)a), Mat, Mat) +CV_MAT_AUG_OPERATOR (&=, cv::bitwise_and(a, b, (const Mat&)a), Mat, Scalar) +CV_MAT_AUG_OPERATOR_T(&=, cv::bitwise_and(a, b, (const Mat&)a), Mat_<_Tp>, Mat) +CV_MAT_AUG_OPERATOR_T(&=, cv::bitwise_and(a, b, (const Mat&)a), Mat_<_Tp>, Scalar) +CV_MAT_AUG_OPERATOR_T(&=, cv::bitwise_and(a, b, (const Mat&)a), Mat_<_Tp>, Mat_<_Tp>) +CV_MAT_AUG_OPERATOR_TN(&=, cv::bitwise_and(a, Mat(b), (const Mat&)a), Mat) +CV_MAT_AUG_OPERATOR_TN(&=, cv::bitwise_and(a, Mat(b), (const Mat&)a), Mat_<_Tp>) + +CV_MAT_AUG_OPERATOR (|=, cv::bitwise_or(a, b, (const Mat&)a), Mat, Mat) +CV_MAT_AUG_OPERATOR (|=, cv::bitwise_or(a, b, (const Mat&)a), Mat, Scalar) +CV_MAT_AUG_OPERATOR_T(|=, cv::bitwise_or(a, b, (const Mat&)a), Mat_<_Tp>, Mat) +CV_MAT_AUG_OPERATOR_T(|=, cv::bitwise_or(a, b, (const Mat&)a), Mat_<_Tp>, Scalar) +CV_MAT_AUG_OPERATOR_T(|=, cv::bitwise_or(a, b, (const Mat&)a), Mat_<_Tp>, Mat_<_Tp>) +CV_MAT_AUG_OPERATOR_TN(|=, cv::bitwise_or(a, Mat(b), (const Mat&)a), Mat) +CV_MAT_AUG_OPERATOR_TN(|=, cv::bitwise_or(a, Mat(b), (const Mat&)a), Mat_<_Tp>) + +CV_MAT_AUG_OPERATOR (^=, cv::bitwise_xor(a, b, (const Mat&)a), Mat, Mat) +CV_MAT_AUG_OPERATOR (^=, cv::bitwise_xor(a, b, (const Mat&)a), Mat, Scalar) +CV_MAT_AUG_OPERATOR_T(^=, cv::bitwise_xor(a, b, (const Mat&)a), Mat_<_Tp>, Mat) +CV_MAT_AUG_OPERATOR_T(^=, cv::bitwise_xor(a, b, (const Mat&)a), Mat_<_Tp>, Scalar) +CV_MAT_AUG_OPERATOR_T(^=, cv::bitwise_xor(a, b, (const Mat&)a), Mat_<_Tp>, Mat_<_Tp>) +CV_MAT_AUG_OPERATOR_TN(^=, cv::bitwise_xor(a, Mat(b), (const Mat&)a), Mat) +CV_MAT_AUG_OPERATOR_TN(^=, cv::bitwise_xor(a, Mat(b), (const Mat&)a), Mat_<_Tp>) + +#undef CV_MAT_AUG_OPERATOR_TN #undef CV_MAT_AUG_OPERATOR_T #undef CV_MAT_AUG_OPERATOR #undef CV_MAT_AUG_OPERATOR1 @@ -349,13 +394,15 @@ inline int RNG::uniform(int a, int b) { return a == b ? a : (int)(next( inline float RNG::uniform(float a, float b) { return ((float)*this)*(b - a) + a; } inline double RNG::uniform(double a, double b) { return ((double)*this)*(b - a) + a; } +inline bool RNG::operator ==(const RNG& other) const { return state == other.state; } + inline unsigned RNG::next() { state = (uint64)(unsigned)state* /*CV_RNG_COEFF*/ 4164903690U + (unsigned)(state >> 32); return (unsigned)state; } -//! returns the next unifomly-distributed random number of the specified type +//! returns the next uniformly-distributed random number of the specified type template static inline _Tp randu() { return (_Tp)theRNG(); @@ -363,12 +410,29 @@ template static inline _Tp randu() ///////////////////////////////// Formatted string generation ///////////////////////////////// -CV_EXPORTS String format( const char* fmt, ... ); +/** @brief Returns a text string formatted using the printf-like expression. + +The function acts like sprintf but forms and returns an STL string. It can be used to form an error +message in the Exception constructor. +@param fmt printf-compatible formatting specifiers. + +**Note**: +|Type|Specifier| +|-|-| +|`const char*`|`%s`| +|`char`|`%c`| +|`float` / `double`|`%f`,`%g`| +|`int`, `long`, `long long`|`%d`, `%ld`, ``%lld`| +|`unsigned`, `unsigned long`, `unsigned long long`|`%u`, `%lu`, `%llu`| +|`uint64` -> `uintmax_t`, `int64` -> `intmax_t`|`%ju`, `%jd`| +|`size_t`|`%zu`| + */ +CV_EXPORTS String format( const char* fmt, ... ) CV_FORMAT_PRINTF(1, 2); ///////////////////////////////// Formatted output of cv::Mat ///////////////////////////////// static inline -Ptr format(InputArray mtx, int fmt) +Ptr format(InputArray mtx, Formatter::FormatType fmt) { return Formatter::get(fmt)->format(mtx.getMat()); } diff --git a/IPL/include/opencv/opencv2/core/optim.hpp b/IPL/include/opencv/opencv2/core/optim.hpp index 23e2155..f61a2b9 100644 --- a/IPL/include/opencv/opencv2/core/optim.hpp +++ b/IPL/include/opencv/opencv2/core/optim.hpp @@ -39,8 +39,8 @@ // //M*/ -#ifndef __OPENCV_OPTIM_HPP__ -#define __OPENCV_OPTIM_HPP__ +#ifndef OPENCV_OPTIM_HPP +#define OPENCV_OPTIM_HPP #include "opencv2/core.hpp" @@ -73,7 +73,7 @@ class CV_EXPORTS MinProblemSolver : public Algorithm /** @brief Getter for the optimized function. The optimized function is represented by Function interface, which requires derivatives to - implement the sole method calc(double*) to evaluate the function. + implement the calc(double*) and getDim() methods to evaluate the function. @return Smart-pointer to an object that implements Function interface - it represents the function that is being optimized. It can be empty, if no function was given so far. @@ -115,7 +115,7 @@ class CV_EXPORTS MinProblemSolver : public Algorithm always sensible) will be used. @param x The initial point, that will become a centroid of an initial simplex. After the algorithm - will terminate, it will be setted to the point where the algorithm stops, the point of possible + will terminate, it will be set to the point where the algorithm stops, the point of possible minimum. @return The value of a function at the point found. */ @@ -165,7 +165,7 @@ class CV_EXPORTS DownhillSolver : public MinProblemSolver /** @brief Sets the initial step that will be used in downhill simplex algorithm. - Step, together with initial point (givin in DownhillSolver::minimize) are two `n`-dimensional + Step, together with initial point (given in DownhillSolver::minimize) are two `n`-dimensional vectors that are used to determine the shape of initial simplex. Roughly said, initial point determines the position of a simplex (it will become simplex's centroid), while step determines the spread (size in each dimension) of a simplex. To be more precise, if \f$s,x_0\in\mathbb{R}^n\f$ are @@ -219,10 +219,10 @@ converge to it. Another obvious restriction is that it should be possible to com a function at any point, thus it is preferable to have analytic expression for gradient and computational burden should be born by the user. -The latter responsibility is accompilished via the getGradient method of a +The latter responsibility is accomplished via the getGradient method of a MinProblemSolver::Function interface (which represents function being optimized). This method takes point a point in *n*-dimensional space (first argument represents the array of coordinates of that -point) and comput its gradient (it should be stored in the second argument as an array). +point) and compute its gradient (it should be stored in the second argument as an array). @note class ConjGradSolver thus does not add any new methods to the basic MinProblemSolver interface. @@ -288,12 +288,12 @@ Bland's rule is used to prevent cy contain 32- or 64-bit floating point numbers. As a convenience, column-vector may be also submitted, in the latter case it is understood to correspond to \f$c^T\f$. @param Constr `m`-by-`n+1` matrix, whose rightmost column corresponds to \f$b\f$ in formulation above -and the remaining to \f$A\f$. It should containt 32- or 64-bit floating point numbers. +and the remaining to \f$A\f$. It should contain 32- or 64-bit floating point numbers. @param z The solution will be returned here as a column-vector - it corresponds to \f$c\f$ in the formulation above. It will contain 64-bit floating point numbers. @return One of cv::SolveLPResult */ -CV_EXPORTS_W int solveLP(const Mat& Func, const Mat& Constr, Mat& z); +CV_EXPORTS_W int solveLP(InputArray Func, InputArray Constr, OutputArray z); //! @} diff --git a/IPL/include/opencv/opencv2/core/ovx.hpp b/IPL/include/opencv/opencv2/core/ovx.hpp new file mode 100644 index 0000000..8bb7d54 --- /dev/null +++ b/IPL/include/opencv/opencv2/core/ovx.hpp @@ -0,0 +1,28 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +// Copyright (C) 2016, Intel Corporation, all rights reserved. +// Third party copyrights are property of their respective owners. + +// OpenVX related definitions and declarations + +#pragma once +#ifndef OPENCV_OVX_HPP +#define OPENCV_OVX_HPP + +#include "cvdef.h" + +namespace cv +{ +/// Check if use of OpenVX is possible +CV_EXPORTS_W bool haveOpenVX(); + +/// Check if use of OpenVX is enabled +CV_EXPORTS_W bool useOpenVX(); + +/// Enable/disable use of OpenVX +CV_EXPORTS_W void setUseOpenVX(bool flag); +} // namespace cv + +#endif // OPENCV_OVX_HPP diff --git a/IPL/include/opencv/opencv2/core/persistence.hpp b/IPL/include/opencv/opencv2/core/persistence.hpp index 17686dd..0ddd3c0 100644 --- a/IPL/include/opencv/opencv2/core/persistence.hpp +++ b/IPL/include/opencv/opencv2/core/persistence.hpp @@ -41,8 +41,13 @@ // //M*/ -#ifndef __OPENCV_CORE_PERSISTENCE_HPP__ -#define __OPENCV_CORE_PERSISTENCE_HPP__ +#ifndef OPENCV_CORE_PERSISTENCE_HPP +#define OPENCV_CORE_PERSISTENCE_HPP + +#ifndef CV_DOXYGEN +/// Define to support persistence legacy formats +#define CV__LEGACY_PERSISTENCE +#endif #ifndef __cplusplus # error persistence.hpp header must be compiled as C++ @@ -57,8 +62,9 @@ Several functions that are described below take CvFileStorage\* as inputs and al save or to load hierarchical collections that consist of scalar values, standard CXCore objects (such as matrices, sequences, graphs), and user-defined objects. -OpenCV can read and write data in XML () or YAML () -formats. Below is an example of 3x3 floating-point identity matrix A, stored in XML and YAML files +OpenCV can read and write data in XML (), YAML () or +JSON () formats. Below is an example of 3x3 floating-point identity matrix A, +stored in XML and YAML files using CXCore functions: XML: @code{.xml} @@ -85,10 +91,9 @@ As it can be seen from the examples, XML uses nested tags to represent hierarchy indentation for that purpose (similar to the Python programming language). The same functions can read and write data in both formats; the particular format is determined by -the extension of the opened file, ".xml" for XML files and ".yml" or ".yaml" for YAML. +the extension of the opened file, ".xml" for XML files, ".yml" or ".yaml" for YAML and ".json" for +JSON. */ -typedef struct CvFileStorage CvFileStorage; -typedef struct CvFileNode CvFileNode; //! @} core_c @@ -99,27 +104,27 @@ namespace cv { /** @addtogroup core_xml -XML/YAML file storages. {#xml_storage} +XML/YAML/JSON file storages. {#xml_storage} ======================= Writing to a file storage. -------------------------- -You can store and then restore various OpenCV data structures to/from XML () -or YAML () formats. Also, it is possible store and load arbitrarily complex -data structures, which include OpenCV data structures, as well as primitive data types (integer and -floating-point numbers and text strings) as their elements. +You can store and then restore various OpenCV data structures to/from XML (), +YAML () or JSON () formats. Also, it is possible to store +and load arbitrarily complex data structures, which include OpenCV data structures, as well as +primitive data types (integer and floating-point numbers and text strings) as their elements. -Use the following procedure to write something to XML or YAML: +Use the following procedure to write something to XML, YAML or JSON: -# Create new FileStorage and open it for writing. It can be done with a single call to FileStorage::FileStorage constructor that takes a filename, or you can use the default constructor -and then call FileStorage::open. Format of the file (XML or YAML) is determined from the filename -extension (".xml" and ".yml"/".yaml", respectively) +and then call FileStorage::open. Format of the file (XML, YAML or JSON) is determined from the filename +extension (".xml", ".yml"/".yaml" and ".json", respectively) -# Write all the data you want using the streaming operator `<<`, just like in the case of STL streams. -# Close the file using FileStorage::release. FileStorage destructor also closes the file. Here is an example: @code - #include "opencv2/opencv.hpp" + #include "opencv2/core.hpp" #include using namespace cv; @@ -151,7 +156,7 @@ Here is an example: return 0; } @endcode -The sample above stores to XML and integer, text string (calibration date), 2 matrices, and a custom +The sample above stores to YML an integer, a text string (calibration date), 2 matrices, and a custom structure "feature", which includes feature coordinates and LBP (local binary pattern) value. Here is output of the sample: @code{.yaml} @@ -175,19 +180,19 @@ distCoeffs: !!opencv-matrix - { x:344, y:158, lbp:[ 1, 1, 0, 0, 0, 0, 1, 0 ] } @endcode -As an exercise, you can replace ".yml" with ".xml" in the sample above and see, how the +As an exercise, you can replace ".yml" with ".xml" or ".json" in the sample above and see, how the corresponding XML file will look like. Several things can be noted by looking at the sample code and the output: -- The produced YAML (and XML) consists of heterogeneous collections that can be nested. There are 2 - types of collections: named collections (mappings) and unnamed collections (sequences). In mappings +- The produced YAML (and XML/JSON) consists of heterogeneous collections that can be nested. There are + 2 types of collections: named collections (mappings) and unnamed collections (sequences). In mappings each element has a name and is accessed by name. This is similar to structures and std::map in C/C++ and dictionaries in Python. In sequences elements do not have names, they are accessed by indices. This is similar to arrays and std::vector in C/C++ and lists, tuples in Python. "Heterogeneous" means that elements of each single collection can have different types. - Top-level collection in YAML/XML is a mapping. Each matrix is stored as a mapping, and the matrix + Top-level collection in YAML/XML/JSON is a mapping. Each matrix is stored as a mapping, and the matrix elements are stored as a sequence. Then, there is a sequence of features, where each feature is represented a mapping, and lbp value in a nested sequence. @@ -203,7 +208,7 @@ Several things can be noted by looking at the sample code and the output: - To write a sequence, you first write the special string `[`, then write the elements, then write the closing `]`. -- In YAML (but not XML), mappings and sequences can be written in a compact Python-like inline +- In YAML/JSON (but not XML), mappings and sequences can be written in a compact Python-like inline form. In the sample above matrix elements, as well as each feature, including its lbp value, is stored in such inline form. To store a mapping/sequence in a compact form, put `:` after the opening character, e.g. use `{:` instead of `{` and `[:` instead of `[`. When the @@ -211,7 +216,7 @@ Several things can be noted by looking at the sample code and the output: Reading data from a file storage. --------------------------------- -To read the previously written XML or YAML file, do the following: +To read the previously written XML, YAML or JSON file, do the following: -# Open the file storage using FileStorage::FileStorage constructor or FileStorage::open method. In the current implementation the whole file is parsed and the whole representation of file storage is built in memory as a hierarchy of file nodes (see FileNode) @@ -278,12 +283,12 @@ element is a structure of 2 integers, followed by a single-precision floating-po equivalent notations of the above specification are `iif`, `2i1f` and so forth. Other examples: `u` means that the array consists of bytes, and `2d` means the array consists of pairs of doubles. -@see @ref filestorage.cpp +@see @ref samples/cpp/filestorage.cpp */ //! @{ -/** @example filestorage.cpp +/** @example samples/cpp/filestorage.cpp A complete example using the FileStorage interface */ @@ -292,8 +297,8 @@ A complete example using the FileStorage interface class CV_EXPORTS FileNode; class CV_EXPORTS FileNodeIterator; -/** @brief XML/YAML file storage class that encapsulates all the information necessary for writing or reading -data to/from a file. +/** @brief XML/YAML/JSON file storage class that encapsulates all the information necessary for writing or +reading data to/from a file. */ class CV_EXPORTS_W FileStorage { @@ -305,13 +310,17 @@ class CV_EXPORTS_W FileStorage WRITE = 1, //!< value, open the file for writing APPEND = 2, //!< value, open the file for appending MEMORY = 4, //!< flag, read data from source or write data to the internal buffer (which is - //!< returned by FileStorage::release) + //!< returned by FileStorage::release) FORMAT_MASK = (7<<3), //!< mask for format flags FORMAT_AUTO = 0, //!< flag, auto format FORMAT_XML = (1<<3), //!< flag, XML format - FORMAT_YAML = (2<<3) //!< flag, YAML format + FORMAT_YAML = (2<<3), //!< flag, YAML format + FORMAT_JSON = (3<<3), //!< flag, JSON format + + BASE64 = 64, //!< flag, write rawdata in Base64 by default. (consider using WRITE_BASE64) + WRITE_BASE64 = BASE64 | WRITE, //!< flag, enable both WRITE and BASE64 }; - enum + enum State { UNDEFINED = 0, VALUE_EXPECTED = 1, @@ -321,129 +330,138 @@ class CV_EXPORTS_W FileStorage /** @brief The constructors. - The full constructor opens the file. Alternatively you can use the default constructor and then - call FileStorage::open. + The full constructor opens the file. Alternatively you can use the default constructor and then + call FileStorage::open. */ CV_WRAP FileStorage(); /** @overload - @param source Name of the file to open or the text string to read the data from. Extension of the - file (.xml or .yml/.yaml) determines its format (XML or YAML respectively). Also you can append .gz - to work with compressed files, for example myHugeMatrix.xml.gz. If both FileStorage::WRITE and - FileStorage::MEMORY flags are specified, source is used just to specify the output file format (e.g. - mydata.xml, .yml etc.). - @param flags Mode of operation. See FileStorage::Mode - @param encoding Encoding of the file. Note that UTF-16 XML encoding is not supported currently and - you should use 8-bit encoding instead of it. - */ - CV_WRAP FileStorage(const String& source, int flags, const String& encoding=String()); - - /** @overload */ - FileStorage(CvFileStorage* fs, bool owning=true); + @copydoc open() + */ + CV_WRAP FileStorage(const String& filename, int flags, const String& encoding=String()); //! the destructor. calls release() virtual ~FileStorage(); /** @brief Opens a file. - See description of parameters in FileStorage::FileStorage. The method calls FileStorage::release - before opening the file. - @param filename Name of the file to open or the text string to read the data from. - Extension of the file (.xml or .yml/.yaml) determines its format (XML or YAML respectively). - Also you can append .gz to work with compressed files, for example myHugeMatrix.xml.gz. If both - FileStorage::WRITE and FileStorage::MEMORY flags are specified, source is used just to specify - the output file format (e.g. mydata.xml, .yml etc.). - @param flags Mode of operation. One of FileStorage::Mode - @param encoding Encoding of the file. Note that UTF-16 XML encoding is not supported currently and - you should use 8-bit encoding instead of it. + See description of parameters in FileStorage::FileStorage. The method calls FileStorage::release + before opening the file. + @param filename Name of the file to open or the text string to read the data from. + Extension of the file (.xml, .yml/.yaml or .json) determines its format (XML, YAML or JSON + respectively). Also you can append .gz to work with compressed files, for example myHugeMatrix.xml.gz. If both + FileStorage::WRITE and FileStorage::MEMORY flags are specified, source is used just to specify + the output file format (e.g. mydata.xml, .yml etc.). A file name can also contain parameters. + You can use this format, "*?base64" (e.g. "file.json?base64" (case sensitive)), as an alternative to + FileStorage::BASE64 flag. + @param flags Mode of operation. One of FileStorage::Mode + @param encoding Encoding of the file. Note that UTF-16 XML encoding is not supported currently and + you should use 8-bit encoding instead of it. */ CV_WRAP virtual bool open(const String& filename, int flags, const String& encoding=String()); /** @brief Checks whether the file is opened. - @returns true if the object is associated with the current file and false otherwise. It is a - good practice to call this method after you tried to open a file. + @returns true if the object is associated with the current file and false otherwise. It is a + good practice to call this method after you tried to open a file. */ CV_WRAP virtual bool isOpened() const; /** @brief Closes the file and releases all the memory buffers. - Call this method after all I/O operations with the storage are finished. + Call this method after all I/O operations with the storage are finished. */ CV_WRAP virtual void release(); /** @brief Closes the file and releases all the memory buffers. - Call this method after all I/O operations with the storage are finished. If the storage was - opened for writing data and FileStorage::WRITE was specified + Call this method after all I/O operations with the storage are finished. If the storage was + opened for writing data and FileStorage::WRITE was specified */ CV_WRAP virtual String releaseAndGetString(); /** @brief Returns the first element of the top-level mapping. - @returns The first element of the top-level mapping. + @returns The first element of the top-level mapping. */ CV_WRAP FileNode getFirstTopLevelNode() const; /** @brief Returns the top-level mapping - @param streamidx Zero-based index of the stream. In most cases there is only one stream in the file. - However, YAML supports multiple streams and so there can be several. - @returns The top-level mapping. + @param streamidx Zero-based index of the stream. In most cases there is only one stream in the file. + However, YAML supports multiple streams and so there can be several. + @returns The top-level mapping. */ CV_WRAP FileNode root(int streamidx=0) const; /** @brief Returns the specified element of the top-level mapping. - @param nodename Name of the file node. - @returns Node with the given name. + @param nodename Name of the file node. + @returns Node with the given name. */ FileNode operator[](const String& nodename) const; /** @overload */ - CV_WRAP FileNode operator[](const char* nodename) const; + CV_WRAP_AS(getNode) FileNode operator[](const char* nodename) const; - /** @brief Returns the obsolete C FileStorage structure. - @returns Pointer to the underlying C FileStorage structure + /** + * @brief Simplified writing API to use with bindings. + * @param name Name of the written object + * @param val Value of the written object */ - CvFileStorage* operator *() { return fs.get(); } - - /** @overload */ - const CvFileStorage* operator *() const { return fs.get(); } + CV_WRAP void write(const String& name, int val); + /// @overload + CV_WRAP void write(const String& name, double val); + /// @overload + CV_WRAP void write(const String& name, const String& val); + /// @overload + CV_WRAP void write(const String& name, const Mat& val); + /// @overload + CV_WRAP void write(const String& name, const std::vector& val); /** @brief Writes multiple numbers. - Writes one or more numbers of the specified format to the currently written structure. Usually it is - more convenient to use operator `<<` instead of this method. - @param fmt Specification of each array element, see @ref format_spec "format specification" - @param vec Pointer to the written array. - @param len Number of the uchar elements to write. + Writes one or more numbers of the specified format to the currently written structure. Usually it is + more convenient to use operator `<<` instead of this method. + @param fmt Specification of each array element, see @ref format_spec "format specification" + @param vec Pointer to the written array. + @param len Number of the uchar elements to write. */ - void writeRaw( const String& fmt, const uchar* vec, size_t len ); + void writeRaw( const String& fmt, const void* vec, size_t len ); + + /** @brief Writes a comment. - /** @brief Writes the registered C structure (CvMat, CvMatND, CvSeq). - @param name Name of the written object. - @param obj Pointer to the object. - @see ocvWrite for details. + The function writes a comment into file storage. The comments are skipped when the storage is read. + @param comment The written comment, single-line or multi-line + @param append If true, the function tries to put the comment at the end of current line. + Else if the comment is multi-line, or if it does not fit at the end of the current + line, the comment starts a new line. */ - void writeObj( const String& name, const void* obj ); + CV_WRAP void writeComment(const String& comment, bool append = false); + + void startWriteStruct(const String& name, int flags, const String& typeName); + void endWriteStruct(); /** @brief Returns the normalized object name for the specified name of a file. - @param filename Name of a file - @returns The normalized object name. + @param filename Name of a file + @returns The normalized object name. */ static String getDefaultObjectName(const String& filename); - Ptr fs; //!< the underlying C FileStorage structure - String elname; //!< the currently written element - std::vector structs; //!< the stack of written structures - int state; //!< the writer state -}; + /** @brief Returns the current format. + * @returns The current format, see FileStorage::Mode + */ + CV_WRAP int getFormat() const; + + int state; + std::string elname; -template<> CV_EXPORTS void DefaultDeleter::operator ()(CvFileStorage* obj) const; + class Impl; + Ptr p; +}; /** @brief File Storage Node class. The node is used to store each and every element of the file storage opened for reading. When XML/YAML file is read, it is first parsed and stored in the memory as a hierarchical collection of -nodes. Each node can be a “leaf” that is contain a single number or a string, or be a collection of +nodes. Each node can be a "leaf" that is contain a single number or a string, or be a collection of other nodes. There can be named collections (mappings) where each element has a name and it is accessed by a name, and ordered collections (sequences) where elements do not have names but rather accessed by index. Type of the file node can be determined using FileNode::type method. @@ -455,7 +473,7 @@ class CV_EXPORTS_W_SIMPLE FileNode { public: //! type of the file storage node - enum Type + enum { NONE = 0, //!< empty node INT = 1, //!< an integer @@ -463,51 +481,60 @@ class CV_EXPORTS_W_SIMPLE FileNode FLOAT = REAL, //!< synonym or REAL STR = 3, //!< text string in UTF-8 encoding STRING = STR, //!< synonym for STR - REF = 4, //!< integer of size size_t. Typically used for storing complex dynamic structures where some elements reference the others - SEQ = 5, //!< sequence - MAP = 6, //!< mapping + SEQ = 4, //!< sequence + MAP = 5, //!< mapping TYPE_MASK = 7, + FLOW = 8, //!< compact representation of a sequence or mapping. Used only by YAML writer - USER = 16, //!< a registered object (e.g. a matrix) - EMPTY = 32, //!< empty structure (sequence or mapping) - NAMED = 64 //!< the node has a name (i.e. it is element of a mapping) + UNIFORM = 8, //!< if set, means that all the collection elements are numbers of the same type (real's or int's). + //!< UNIFORM is used only when reading FileStorage; FLOW is used only when writing. So they share the same bit + EMPTY = 16, //!< empty structure (sequence or mapping) + NAMED = 32 //!< the node has a name (i.e. it is element of a mapping). }; /** @brief The constructors. - These constructors are used to create a default file node, construct it from obsolete structures or - from the another file node. + These constructors are used to create a default file node, construct it from obsolete structures or + from the another file node. */ CV_WRAP FileNode(); /** @overload - @param fs Pointer to the obsolete file storage structure. - @param node File node to be used as initialization for the created file node. - */ - FileNode(const CvFileStorage* fs, const CvFileNode* node); + @param fs Pointer to the file storage structure. + @param blockIdx Index of the memory block where the file node is stored + @param ofs Offset in bytes from the beginning of the serialized storage + */ + FileNode(const FileStorage* fs, size_t blockIdx, size_t ofs); /** @overload - @param node File node to be used as initialization for the created file node. - */ + @param node File node to be used as initialization for the created file node. + */ FileNode(const FileNode& node); + FileNode& operator=(const FileNode& node); + /** @brief Returns element of a mapping node or a sequence node. - @param nodename Name of an element in the mapping node. - @returns Returns the element with the given identifier. + @param nodename Name of an element in the mapping node. + @returns Returns the element with the given identifier. */ FileNode operator[](const String& nodename) const; /** @overload - @param nodename Name of an element in the mapping node. - */ - CV_WRAP FileNode operator[](const char* nodename) const; + @param nodename Name of an element in the mapping node. + */ + CV_WRAP_AS(getNode) FileNode operator[](const char* nodename) const; /** @overload - @param i Index of an element in the sequence node. - */ - CV_WRAP FileNode operator[](int i) const; + @param i Index of an element in the sequence node. + */ + CV_WRAP_AS(at) FileNode operator[](int i) const; + + /** @brief Returns keys of a mapping node. + @returns Keys of a mapping node. + */ + CV_WRAP std::vector keys() const; /** @brief Returns type of the node. - @returns Type of the node. See FileNode::Type + @returns Type of the node. See FileNode::Type */ CV_WRAP int type() const; @@ -528,9 +555,11 @@ class CV_EXPORTS_W_SIMPLE FileNode //! returns true if the node has a name CV_WRAP bool isNamed() const; //! returns the node name or an empty string if the node is nameless - CV_WRAP String name() const; + CV_WRAP std::string name() const; //! returns the number of elements in the node, if it is a sequence or mapping, or 1 otherwise. CV_WRAP size_t size() const; + //! returns raw size of the FileNode in bytes + CV_WRAP size_t rawSize() const; //! returns the node content as an integer. If the node stores floating-point number, it is rounded. operator int() const; //! returns the node content as float @@ -538,15 +567,16 @@ class CV_EXPORTS_W_SIMPLE FileNode //! returns the node content as double operator double() const; //! returns the node content as text string - operator String() const; -#ifndef OPENCV_NOSTL - operator std::string() const; -#endif + inline operator std::string() const { return this->string(); } - //! returns pointer to the underlying file node - CvFileNode* operator *(); - //! returns pointer to the underlying file node - const CvFileNode* operator* () const; + static bool isMap(int flags); + static bool isSeq(int flags); + static bool isCollection(int flags); + static bool isEmptyCollection(int flags); + static bool isFlow(int flags); + + uchar* ptr(); + const uchar* ptr() const; //! returns iterator pointing to the first node element FileNodeIterator begin() const; @@ -558,92 +588,95 @@ class CV_EXPORTS_W_SIMPLE FileNode Usually it is more convenient to use operator `>>` instead of this method. @param fmt Specification of each array element. See @ref format_spec "format specification" @param vec Pointer to the destination array. - @param len Number of elements to read. If it is greater than number of remaining elements then all - of them will be read. + @param len Number of bytes to read (buffer size limit). If it is greater than number of + remaining elements then all of them will be read. */ - void readRaw( const String& fmt, uchar* vec, size_t len ) const; - - //! reads the registered object and returns pointer to it - void* readObj() const; + void readRaw( const String& fmt, void* vec, size_t len ) const; - // do not use wrapper pointer classes for better efficiency - const CvFileStorage* fs; - const CvFileNode* node; + /** Internal method used when reading FileStorage. + Sets the type (int, real or string) and value of the previously created node. + */ + void setValue( int type, const void* value, int len=-1 ); + + //! Simplified reading API to use with bindings. + CV_WRAP double real() const; + //! Simplified reading API to use with bindings. + CV_WRAP std::string string() const; + //! Simplified reading API to use with bindings. + CV_WRAP Mat mat() const; + + //protected: + const FileStorage* fs; + size_t blockIdx; + size_t ofs; }; /** @brief used to iterate through sequences and mappings. -A standard STL notation, with node.begin(), node.end() denoting the beginning and the end of a -sequence, stored in node. See the data reading sample in the beginning of the section. + A standard STL notation, with node.begin(), node.end() denoting the beginning and the end of a + sequence, stored in node. See the data reading sample in the beginning of the section. */ class CV_EXPORTS FileNodeIterator { public: /** @brief The constructors. - These constructors are used to create a default iterator, set it to specific element in a file node - or construct it from another iterator. + These constructors are used to create a default iterator, set it to specific element in a file node + or construct it from another iterator. */ FileNodeIterator(); /** @overload - @param fs File storage for the iterator. - @param node File node for the iterator. - @param ofs Index of the element in the node. The created iterator will point to this element. - */ - FileNodeIterator(const CvFileStorage* fs, const CvFileNode* node, size_t ofs=0); + @param node File node - the collection to iterate over; + it can be a scalar (equivalent to 1-element collection) or "none" (equivalent to empty collection). + @param seekEnd - true if iterator needs to be set after the last element of the node; + that is: + * node.begin() => FileNodeIterator(node, false) + * node.end() => FileNodeIterator(node, true) + */ + FileNodeIterator(const FileNode& node, bool seekEnd); /** @overload - @param it Iterator to be used as initialization for the created iterator. - */ + @param it Iterator to be used as initialization for the created iterator. + */ FileNodeIterator(const FileNodeIterator& it); + FileNodeIterator& operator=(const FileNodeIterator& it); + //! returns the currently observed element FileNode operator *() const; - //! accesses the currently observed element methods - FileNode operator ->() const; //! moves iterator to the next node FileNodeIterator& operator ++ (); //! moves iterator to the next node FileNodeIterator operator ++ (int); - //! moves iterator to the previous node - FileNodeIterator& operator -- (); - //! moves iterator to the previous node - FileNodeIterator operator -- (int); //! moves iterator forward by the specified offset (possibly negative) FileNodeIterator& operator += (int ofs); - //! moves iterator backward by the specified offset (possibly negative) - FileNodeIterator& operator -= (int ofs); /** @brief Reads node elements to the buffer with the specified format. Usually it is more convenient to use operator `>>` instead of this method. @param fmt Specification of each array element. See @ref format_spec "format specification" @param vec Pointer to the destination array. - @param maxCount Number of elements to read. If it is greater than number of remaining elements then - all of them will be read. + @param len Number of bytes to read (buffer size limit). If it is greater than number of + remaining elements then all of them will be read. */ - FileNodeIterator& readRaw( const String& fmt, uchar* vec, - size_t maxCount=(size_t)INT_MAX ); + FileNodeIterator& readRaw( const String& fmt, void* vec, + size_t len=(size_t)INT_MAX ); - struct SeqReader - { - int header_size; - void* seq; /* sequence, beign read; CvSeq */ - void* block; /* current block; CvSeqBlock */ - schar* ptr; /* pointer to element be read next */ - schar* block_min; /* pointer to the beginning of block */ - schar* block_max; /* pointer to the end of block */ - int delta_index;/* = seq->first->start_index */ - schar* prev_elem; /* pointer to previous element */ - }; + //! returns the number of remaining (not read yet) elements + size_t remaining() const; + + bool equalTo(const FileNodeIterator& it) const; - const CvFileStorage* fs; - const CvFileNode* container; - SeqReader reader; - size_t remaining; +protected: + const FileStorage* fs; + size_t blockIdx; + size_t ofs; + size_t blockSize; + size_t nodeNElems; + size_t idx; }; //! @} core_xml @@ -659,8 +692,10 @@ CV_EXPORTS void write( FileStorage& fs, const String& name, double value ); CV_EXPORTS void write( FileStorage& fs, const String& name, const String& value ); CV_EXPORTS void write( FileStorage& fs, const String& name, const Mat& value ); CV_EXPORTS void write( FileStorage& fs, const String& name, const SparseMat& value ); +#ifdef CV__LEGACY_PERSISTENCE CV_EXPORTS void write( FileStorage& fs, const String& name, const std::vector& value); CV_EXPORTS void write( FileStorage& fs, const String& name, const std::vector& value); +#endif CV_EXPORTS void writeScalar( FileStorage& fs, int value ); CV_EXPORTS void writeScalar( FileStorage& fs, float value ); @@ -675,11 +710,15 @@ CV_EXPORTS void writeScalar( FileStorage& fs, const String& value ); CV_EXPORTS void read(const FileNode& node, int& value, int default_value); CV_EXPORTS void read(const FileNode& node, float& value, float default_value); CV_EXPORTS void read(const FileNode& node, double& value, double default_value); -CV_EXPORTS void read(const FileNode& node, String& value, const String& default_value); +CV_EXPORTS void read(const FileNode& node, std::string& value, const std::string& default_value); CV_EXPORTS void read(const FileNode& node, Mat& mat, const Mat& default_mat = Mat() ); CV_EXPORTS void read(const FileNode& node, SparseMat& mat, const SparseMat& default_mat = SparseMat() ); +#ifdef CV__LEGACY_PERSISTENCE CV_EXPORTS void read(const FileNode& node, std::vector& keypoints); CV_EXPORTS void read(const FileNode& node, std::vector& matches); +#endif +CV_EXPORTS void read(const FileNode& node, KeyPoint& value, const KeyPoint& default_value); +CV_EXPORTS void read(const FileNode& node, DMatch& value, const DMatch& default_value); template static inline void read(const FileNode& node, Point_<_Tp>& value, const Point_<_Tp>& default_value) { @@ -719,6 +758,17 @@ template static inline void read(const FileNode& node, Vec value = temp.size() != cn ? default_value : Vec<_Tp, cn>(&temp[0]); } +template static inline void read(const FileNode& node, Matx<_Tp, m, n>& value, const Matx<_Tp, m, n>& default_matx = Matx<_Tp, m, n>()) +{ + Mat temp; + read(node, temp); // read as a Mat class + + if (temp.empty()) + value = default_matx; + else + value = Matx<_Tp, m, n>(temp); +} + template static inline void read(const FileNode& node, Scalar_<_Tp>& value, const Scalar_<_Tp>& default_value) { std::vector<_Tp> temp; FileNodeIterator it = node.begin(); it >> temp; @@ -773,7 +823,7 @@ namespace internal VecWriterProxy( FileStorage* _fs ) : fs(_fs) {} void operator()(const std::vector<_Tp>& vec) const { - int _fmt = DataType<_Tp>::fmt; + int _fmt = traits::SafeFmt<_Tp>::fmt; char fmt[] = { (char)((_fmt >> 8) + '1'), (char)_fmt, '\0' }; fs->writeRaw(fmt, !vec.empty() ? (uchar*)&vec[0] : 0, vec.size() * sizeof(_Tp)); } @@ -787,7 +837,7 @@ namespace internal VecReaderProxy( FileNodeIterator* _it ) : it(_it) {} void operator()(std::vector<_Tp>& vec, size_t count) const { - count = std::min(count, it->remaining); + count = std::min(count, it->remaining()); vec.resize(count); for (size_t i = 0; i < count; i++, ++(*it)) read(**it, vec[i], _Tp()); @@ -802,12 +852,14 @@ namespace internal VecReaderProxy( FileNodeIterator* _it ) : it(_it) {} void operator()(std::vector<_Tp>& vec, size_t count) const { - size_t remaining = it->remaining; + size_t remaining = it->remaining(); size_t cn = DataType<_Tp>::channels; - int _fmt = DataType<_Tp>::fmt; + int _fmt = traits::SafeFmt<_Tp>::fmt; + CV_Assert((_fmt >> 8) < 9); char fmt[] = { (char)((_fmt >> 8)+'1'), (char)_fmt, '\0' }; + CV_Assert((remaining % cn) == 0); size_t remaining1 = remaining / cn; - count = count < remaining1 ? count : remaining1; + count = count > remaining1 ? remaining1 : count; vec.resize(count); it->readRaw(fmt, !vec.empty() ? (uchar*)&vec[0] : 0, count*sizeof(_Tp)); } @@ -897,6 +949,12 @@ void write(FileStorage& fs, const Vec<_Tp, cn>& v ) write(fs, v.val[i]); } +template static inline +void write(FileStorage& fs, const Matx<_Tp, m, n>& x ) +{ + write(fs, Mat(x)); // write as a Mat class +} + template static inline void write(FileStorage& fs, const Scalar_<_Tp>& s ) { @@ -916,11 +974,10 @@ void write(FileStorage& fs, const Range& r ) template static inline void write( FileStorage& fs, const std::vector<_Tp>& vec ) { - cv::internal::VecWriterProxy<_Tp, DataType<_Tp>::fmt != 0> w(&fs); + cv::internal::VecWriterProxy<_Tp, traits::SafeFmt<_Tp>::fmt != 0> w(&fs); w(vec); } - template static inline void write(FileStorage& fs, const String& name, const Point_<_Tp>& pt ) { @@ -963,6 +1020,12 @@ void write(FileStorage& fs, const String& name, const Vec<_Tp, cn>& v ) write(fs, v); } +template static inline +void write(FileStorage& fs, const String& name, const Matx<_Tp, m, n>& x ) +{ + write(fs, name, Mat(x)); // write as a Mat class +} + template static inline void write(FileStorage& fs, const String& name, const Scalar_<_Tp>& s ) { @@ -977,13 +1040,71 @@ void write(FileStorage& fs, const String& name, const Range& r ) write(fs, r); } +static inline +void write(FileStorage& fs, const String& name, const KeyPoint& kpt) +{ + cv::internal::WriteStructContext ws(fs, name, FileNode::SEQ+FileNode::FLOW); + write(fs, kpt.pt.x); + write(fs, kpt.pt.y); + write(fs, kpt.size); + write(fs, kpt.angle); + write(fs, kpt.response); + write(fs, kpt.octave); + write(fs, kpt.class_id); +} + +static inline +void write(FileStorage& fs, const String& name, const DMatch& m) +{ + cv::internal::WriteStructContext ws(fs, name, FileNode::SEQ+FileNode::FLOW); + write(fs, m.queryIdx); + write(fs, m.trainIdx); + write(fs, m.imgIdx); + write(fs, m.distance); +} + +template::value >::type* = nullptr> +static inline void write( FileStorage& fs, const String& name, const _Tp& val ) +{ + write(fs, name, static_cast(val)); +} + template static inline void write( FileStorage& fs, const String& name, const std::vector<_Tp>& vec ) { - cv::internal::WriteStructContext ws(fs, name, FileNode::SEQ+(DataType<_Tp>::fmt != 0 ? FileNode::FLOW : 0)); + cv::internal::WriteStructContext ws(fs, name, FileNode::SEQ+(traits::SafeFmt<_Tp>::fmt != 0 ? FileNode::FLOW : 0)); write(fs, vec); } +template static inline +void write( FileStorage& fs, const String& name, const std::vector< std::vector<_Tp> >& vec ) +{ + cv::internal::WriteStructContext ws(fs, name, FileNode::SEQ); + for(size_t i = 0; i < vec.size(); i++) + { + cv::internal::WriteStructContext ws_(fs, name, FileNode::SEQ+(traits::SafeFmt<_Tp>::fmt != 0 ? FileNode::FLOW : 0)); + write(fs, vec[i]); + } +} + +#ifdef CV__LEGACY_PERSISTENCE +// This code is not needed anymore, but it is preserved here to keep source compatibility +// Implementation is similar to templates instantiations +static inline void write(FileStorage& fs, const KeyPoint& kpt) { write(fs, String(), kpt); } +static inline void write(FileStorage& fs, const DMatch& m) { write(fs, String(), m); } +static inline void write(FileStorage& fs, const std::vector& vec) +{ + cv::internal::VecWriterProxy w(&fs); + w(vec); +} +static inline void write(FileStorage& fs, const std::vector& vec) +{ + cv::internal::VecWriterProxy w(&fs); + w(vec); + +} +#endif + //! @} FileStorage //! @relates cv::FileNode @@ -1032,14 +1153,22 @@ void read(const FileNode& node, short& value, short default_value) template static inline void read( FileNodeIterator& it, std::vector<_Tp>& vec, size_t maxCount = (size_t)INT_MAX ) { - cv::internal::VecReaderProxy<_Tp, DataType<_Tp>::fmt != 0> r(&it); + cv::internal::VecReaderProxy<_Tp, traits::SafeFmt<_Tp>::fmt != 0> r(&it); r(vec, maxCount); } +template::value >::type* = nullptr> +static inline void read(const FileNode& node, _Tp& value, const _Tp& default_value = static_cast<_Tp>(0)) +{ + int temp; + read(node, temp, static_cast(default_value)); + value = static_cast<_Tp>(temp); +} + template static inline void read( const FileNode& node, std::vector<_Tp>& vec, const std::vector<_Tp>& default_value = std::vector<_Tp>() ) { - if(!node.node) + if(node.empty()) vec = default_value; else { @@ -1048,6 +1177,24 @@ void read( const FileNode& node, std::vector<_Tp>& vec, const std::vector<_Tp>& } } +static inline +void read( const FileNode& node, std::vector& vec, const std::vector& default_value ) +{ + if(node.empty()) + vec = default_value; + else + read(node, vec); +} + +static inline +void read( const FileNode& node, std::vector& vec, const std::vector& default_value ) +{ + if(node.empty()) + vec = default_value; + else + read(node, vec); +} + //! @} FileNode //! @relates cv::FileStorage @@ -1103,7 +1250,7 @@ FileNodeIterator& operator >> (FileNodeIterator& it, _Tp& value) template static inline FileNodeIterator& operator >> (FileNodeIterator& it, std::vector<_Tp>& vec) { - cv::internal::VecReaderProxy<_Tp, DataType<_Tp>::fmt != 0> r(&it); + cv::internal::VecReaderProxy<_Tp, traits::SafeFmt<_Tp>::fmt != 0> r(&it); r(vec, (size_t)INT_MAX); return it; } @@ -1130,66 +1277,61 @@ void operator >> (const FileNode& n, std::vector<_Tp>& vec) it >> vec; } -//! @} FileNode - -//! @relates cv::FileNodeIterator -//! @{ +/** @brief Reads KeyPoint from a file storage. +*/ +//It needs special handling because it contains two types of fields, int & float. +static inline +void operator >> (const FileNode& n, KeyPoint& kpt) +{ + FileNodeIterator it = n.begin(); + it >> kpt.pt.x >> kpt.pt.y >> kpt.size >> kpt.angle >> kpt.response >> kpt.octave >> kpt.class_id; +} +#ifdef CV__LEGACY_PERSISTENCE +static inline +void operator >> (const FileNode& n, std::vector& vec) +{ + read(n, vec); +} static inline -bool operator == (const FileNodeIterator& it1, const FileNodeIterator& it2) +void operator >> (const FileNode& n, std::vector& vec) { - return it1.fs == it2.fs && it1.container == it2.container && - it1.reader.ptr == it2.reader.ptr && it1.remaining == it2.remaining; + read(n, vec); } +#endif +/** @brief Reads DMatch from a file storage. +*/ +//It needs special handling because it contains two types of fields, int & float. static inline -bool operator != (const FileNodeIterator& it1, const FileNodeIterator& it2) +void operator >> (const FileNode& n, DMatch& m) { - return !(it1 == it2); + FileNodeIterator it = n.begin(); + it >> m.queryIdx >> m.trainIdx >> m.imgIdx >> m.distance; } +//! @} FileNode + +//! @relates cv::FileNodeIterator +//! @{ + +CV_EXPORTS bool operator == (const FileNodeIterator& it1, const FileNodeIterator& it2); +CV_EXPORTS bool operator != (const FileNodeIterator& it1, const FileNodeIterator& it2); + static inline ptrdiff_t operator - (const FileNodeIterator& it1, const FileNodeIterator& it2) { - return it2.remaining - it1.remaining; + return it2.remaining() - it1.remaining(); } static inline bool operator < (const FileNodeIterator& it1, const FileNodeIterator& it2) { - return it1.remaining > it2.remaining; + return it1.remaining() > it2.remaining(); } //! @} FileNodeIterator -//! @cond IGNORED - -inline FileNode FileStorage::getFirstTopLevelNode() const { FileNode r = root(); FileNodeIterator it = r.begin(); return it != r.end() ? *it : FileNode(); } -inline FileNode::FileNode() : fs(0), node(0) {} -inline FileNode::FileNode(const CvFileStorage* _fs, const CvFileNode* _node) : fs(_fs), node(_node) {} -inline FileNode::FileNode(const FileNode& _node) : fs(_node.fs), node(_node.node) {} -inline bool FileNode::empty() const { return node == 0; } -inline bool FileNode::isNone() const { return type() == NONE; } -inline bool FileNode::isSeq() const { return type() == SEQ; } -inline bool FileNode::isMap() const { return type() == MAP; } -inline bool FileNode::isInt() const { return type() == INT; } -inline bool FileNode::isReal() const { return type() == REAL; } -inline bool FileNode::isString() const { return type() == STR; } -inline CvFileNode* FileNode::operator *() { return (CvFileNode*)node; } -inline const CvFileNode* FileNode::operator* () const { return node; } -inline FileNode::operator int() const { int value; read(*this, value, 0); return value; } -inline FileNode::operator float() const { float value; read(*this, value, 0.f); return value; } -inline FileNode::operator double() const { double value; read(*this, value, 0.); return value; } -inline FileNode::operator String() const { String value; read(*this, value, value); return value; } -inline FileNodeIterator FileNode::begin() const { return FileNodeIterator(fs, node); } -inline FileNodeIterator FileNode::end() const { return FileNodeIterator(fs, node, size()); } -inline void FileNode::readRaw( const String& fmt, uchar* vec, size_t len ) const { begin().readRaw( fmt, vec, len ); } -inline FileNode FileNodeIterator::operator *() const { return FileNode(fs, (const CvFileNode*)(const void*)reader.ptr); } -inline FileNode FileNodeIterator::operator ->() const { return FileNode(fs, (const CvFileNode*)(const void*)reader.ptr); } -inline String::String(const FileNode& fn): cstr_(0), len_(0) { read(fn, *this, *this); } - -//! @endcond - } // cv -#endif // __OPENCV_CORE_PERSISTENCE_HPP__ +#endif // OPENCV_CORE_PERSISTENCE_HPP diff --git a/IPL/include/opencv/opencv2/core/private.cuda.hpp b/IPL/include/opencv/opencv2/core/private.cuda.hpp deleted file mode 100644 index d676ce8..0000000 --- a/IPL/include/opencv/opencv2/core/private.cuda.hpp +++ /dev/null @@ -1,172 +0,0 @@ -/*M/////////////////////////////////////////////////////////////////////////////////////// -// -// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. -// -// By downloading, copying, installing or using the software you agree to this license. -// If you do not agree to this license, do not download, install, -// copy or use the software. -// -// -// License Agreement -// For Open Source Computer Vision Library -// -// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. -// Copyright (C) 2009, Willow Garage Inc., all rights reserved. -// Copyright (C) 2013, OpenCV Foundation, all rights reserved. -// Third party copyrights are property of their respective owners. -// -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: -// -// * Redistribution's of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// * Redistribution's in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// * The name of the copyright holders may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// This software is provided by the copyright holders and contributors "as is" and -// any express or implied warranties, including, but not limited to, the implied -// warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, -// indirect, incidental, special, exemplary, or consequential damages -// (including, but not limited to, procurement of substitute goods or services; -// loss of use, data, or profits; or business interruption) however caused -// and on any theory of liability, whether in contract, strict liability, -// or tort (including negligence or otherwise) arising in any way out of -// the use of this software, even if advised of the possibility of such damage. -// -//M*/ - -#ifndef __OPENCV_CORE_PRIVATE_CUDA_HPP__ -#define __OPENCV_CORE_PRIVATE_CUDA_HPP__ - -#ifndef __OPENCV_BUILD -# error this is a private header which should not be used from outside of the OpenCV library -#endif - -#include "cvconfig.h" - -#include "opencv2/core/cvdef.h" -#include "opencv2/core/base.hpp" - -#include "opencv2/core/cuda.hpp" - -#ifdef HAVE_CUDA -# include -# include -# include -# include "opencv2/core/cuda_stream_accessor.hpp" -# include "opencv2/core/cuda/common.hpp" - -# define NPP_VERSION (NPP_VERSION_MAJOR * 1000 + NPP_VERSION_MINOR * 100 + NPP_VERSION_BUILD) - -# define CUDART_MINIMUM_REQUIRED_VERSION 4020 - -# if (CUDART_VERSION < CUDART_MINIMUM_REQUIRED_VERSION) -# error "Insufficient Cuda Runtime library version, please update it." -# endif - -# if defined(CUDA_ARCH_BIN_OR_PTX_10) -# error "OpenCV CUDA module doesn't support NVIDIA compute capability 1.0" -# endif -#endif - -//! @cond IGNORED - -namespace cv { namespace cuda { - CV_EXPORTS cv::String getNppErrorMessage(int code); - CV_EXPORTS cv::String getCudaDriverApiErrorMessage(int code); - - CV_EXPORTS GpuMat getInputMat(InputArray _src, Stream& stream); - - CV_EXPORTS GpuMat getOutputMat(OutputArray _dst, int rows, int cols, int type, Stream& stream); - static inline GpuMat getOutputMat(OutputArray _dst, Size size, int type, Stream& stream) - { - return getOutputMat(_dst, size.height, size.width, type, stream); - } - - CV_EXPORTS void syncOutput(const GpuMat& dst, OutputArray _dst, Stream& stream); -}} - -#ifndef HAVE_CUDA - -static inline void throw_no_cuda() { CV_Error(cv::Error::GpuNotSupported, "The library is compiled without CUDA support"); } - -#else // HAVE_CUDA - -static inline void throw_no_cuda() { CV_Error(cv::Error::StsNotImplemented, "The called functionality is disabled for current build or platform"); } - -namespace cv { namespace cuda -{ - class CV_EXPORTS BufferPool - { - public: - explicit BufferPool(Stream& stream); - - GpuMat getBuffer(int rows, int cols, int type); - GpuMat getBuffer(Size size, int type) { return getBuffer(size.height, size.width, type); } - - GpuMat::Allocator* getAllocator() const { return allocator_; } - - private: - GpuMat::Allocator* allocator_; - }; - - static inline void checkNppError(int code, const char* file, const int line, const char* func) - { - if (code < 0) - cv::error(cv::Error::GpuApiCallError, getNppErrorMessage(code), func, file, line); - } - - static inline void checkCudaDriverApiError(int code, const char* file, const int line, const char* func) - { - if (code != CUDA_SUCCESS) - cv::error(cv::Error::GpuApiCallError, getCudaDriverApiErrorMessage(code), func, file, line); - } - - template struct NPPTypeTraits; - template<> struct NPPTypeTraits { typedef Npp8u npp_type; }; - template<> struct NPPTypeTraits { typedef Npp8s npp_type; }; - template<> struct NPPTypeTraits { typedef Npp16u npp_type; }; - template<> struct NPPTypeTraits { typedef Npp16s npp_type; }; - template<> struct NPPTypeTraits { typedef Npp32s npp_type; }; - template<> struct NPPTypeTraits { typedef Npp32f npp_type; }; - template<> struct NPPTypeTraits { typedef Npp64f npp_type; }; - - class NppStreamHandler - { - public: - inline explicit NppStreamHandler(Stream& newStream) - { - oldStream = nppGetStream(); - nppSetStream(StreamAccessor::getStream(newStream)); - } - - inline explicit NppStreamHandler(cudaStream_t newStream) - { - oldStream = nppGetStream(); - nppSetStream(newStream); - } - - inline ~NppStreamHandler() - { - nppSetStream(oldStream); - } - - private: - cudaStream_t oldStream; - }; -}} - -#define nppSafeCall(expr) cv::cuda::checkNppError(expr, __FILE__, __LINE__, CV_Func) -#define cuSafeCall(expr) cv::cuda::checkCudaDriverApiError(expr, __FILE__, __LINE__, CV_Func) - -#endif // HAVE_CUDA - -//! @endcond - -#endif // __OPENCV_CORE_CUDA_PRIVATE_HPP__ diff --git a/IPL/include/opencv/opencv2/core/private.hpp b/IPL/include/opencv/opencv2/core/private.hpp deleted file mode 100644 index c71ec62..0000000 --- a/IPL/include/opencv/opencv2/core/private.hpp +++ /dev/null @@ -1,425 +0,0 @@ -/*M/////////////////////////////////////////////////////////////////////////////////////// -// -// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. -// -// By downloading, copying, installing or using the software you agree to this license. -// If you do not agree to this license, do not download, install, -// copy or use the software. -// -// -// License Agreement -// For Open Source Computer Vision Library -// -// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. -// Copyright (C) 2009, Willow Garage Inc., all rights reserved. -// Copyright (C) 2013, OpenCV Foundation, all rights reserved. -// Third party copyrights are property of their respective owners. -// -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: -// -// * Redistribution's of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// * Redistribution's in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// * The name of the copyright holders may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// This software is provided by the copyright holders and contributors "as is" and -// any express or implied warranties, including, but not limited to, the implied -// warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, -// indirect, incidental, special, exemplary, or consequential damages -// (including, but not limited to, procurement of substitute goods or services; -// loss of use, data, or profits; or business interruption) however caused -// and on any theory of liability, whether in contract, strict liability, -// or tort (including negligence or otherwise) arising in any way out of -// the use of this software, even if advised of the possibility of such damage. -// -//M*/ - -#ifndef __OPENCV_CORE_PRIVATE_HPP__ -#define __OPENCV_CORE_PRIVATE_HPP__ - -#ifndef __OPENCV_BUILD -# error this is a private header which should not be used from outside of the OpenCV library -#endif - -#include "opencv2/core.hpp" -#include "cvconfig.h" - -#ifdef HAVE_EIGEN -# if defined __GNUC__ && defined __APPLE__ -# pragma GCC diagnostic ignored "-Wshadow" -# endif -# include -# include "opencv2/core/eigen.hpp" -#endif - -#ifdef HAVE_TBB -# include "tbb/tbb_stddef.h" -# if TBB_VERSION_MAJOR*100 + TBB_VERSION_MINOR >= 202 -# include "tbb/tbb.h" -# include "tbb/task.h" -# undef min -# undef max -# else -# undef HAVE_TBB -# endif -#endif - -//! @cond IGNORED - -namespace cv -{ -#ifdef HAVE_TBB - - typedef tbb::blocked_range BlockedRange; - - template static inline - void parallel_for( const BlockedRange& range, const Body& body ) - { - tbb::parallel_for(range, body); - } - - typedef tbb::split Split; - - template static inline - void parallel_reduce( const BlockedRange& range, Body& body ) - { - tbb::parallel_reduce(range, body); - } - - typedef tbb::concurrent_vector ConcurrentRectVector; -#else - class BlockedRange - { - public: - BlockedRange() : _begin(0), _end(0), _grainsize(0) {} - BlockedRange(int b, int e, int g=1) : _begin(b), _end(e), _grainsize(g) {} - int begin() const { return _begin; } - int end() const { return _end; } - int grainsize() const { return _grainsize; } - - protected: - int _begin, _end, _grainsize; - }; - - template static inline - void parallel_for( const BlockedRange& range, const Body& body ) - { - body(range); - } - typedef std::vector ConcurrentRectVector; - - class Split {}; - - template static inline - void parallel_reduce( const BlockedRange& range, Body& body ) - { - body(range); - } -#endif - - // Returns a static string if there is a parallel framework, - // NULL otherwise. - CV_EXPORTS const char* currentParallelFramework(); -} //namespace cv - -/****************************************************************************************\ -* Common declarations * -\****************************************************************************************/ - -/* the alignment of all the allocated buffers */ -#define CV_MALLOC_ALIGN 16 - -/* IEEE754 constants and macros */ -#define CV_TOGGLE_FLT(x) ((x)^((int)(x) < 0 ? 0x7fffffff : 0)) -#define CV_TOGGLE_DBL(x) ((x)^((int64)(x) < 0 ? CV_BIG_INT(0x7fffffffffffffff) : 0)) - -static inline void* cvAlignPtr( const void* ptr, int align = 32 ) -{ - CV_DbgAssert ( (align & (align-1)) == 0 ); - return (void*)( ((size_t)ptr + align - 1) & ~(size_t)(align-1) ); -} - -static inline int cvAlign( int size, int align ) -{ - CV_DbgAssert( (align & (align-1)) == 0 && size < INT_MAX ); - return (size + align - 1) & -align; -} - -#ifdef IPL_DEPTH_8U -static inline cv::Size cvGetMatSize( const CvMat* mat ) -{ - return cv::Size(mat->cols, mat->rows); -} -#endif - -namespace cv -{ -CV_EXPORTS void scalarToRawData(const cv::Scalar& s, void* buf, int type, int unroll_to = 0); -} - -// property implementation macros - -#define CV_IMPL_PROPERTY_RO(type, name, member) \ - inline type get##name() const { return member; } - -#define CV_HELP_IMPL_PROPERTY(r_type, w_type, name, member) \ - CV_IMPL_PROPERTY_RO(r_type, name, member) \ - inline void set##name(w_type val) { member = val; } - -#define CV_HELP_WRAP_PROPERTY(r_type, w_type, name, internal_name, internal_obj) \ - r_type get##name() const { return internal_obj.get##internal_name(); } \ - void set##name(w_type val) { internal_obj.set##internal_name(val); } - -#define CV_IMPL_PROPERTY(type, name, member) CV_HELP_IMPL_PROPERTY(type, type, name, member) -#define CV_IMPL_PROPERTY_S(type, name, member) CV_HELP_IMPL_PROPERTY(type, const type &, name, member) - -#define CV_WRAP_PROPERTY(type, name, internal_name, internal_obj) CV_HELP_WRAP_PROPERTY(type, type, name, internal_name, internal_obj) -#define CV_WRAP_PROPERTY_S(type, name, internal_name, internal_obj) CV_HELP_WRAP_PROPERTY(type, const type &, name, internal_name, internal_obj) - -#define CV_WRAP_SAME_PROPERTY(type, name, internal_obj) CV_WRAP_PROPERTY(type, name, name, internal_obj) -#define CV_WRAP_SAME_PROPERTY_S(type, name, internal_obj) CV_WRAP_PROPERTY_S(type, name, name, internal_obj) - -/****************************************************************************************\ -* Structures and macros for integration with IPP * -\****************************************************************************************/ - -#ifdef HAVE_IPP -#include "ipp.h" - -#ifndef IPP_VERSION_UPDATE // prior to 7.1 -#define IPP_VERSION_UPDATE 0 -#endif - -#define IPP_VERSION_X100 (IPP_VERSION_MAJOR * 100 + IPP_VERSION_MINOR*10 + IPP_VERSION_UPDATE) - -// General define for ipp function disabling -#define IPP_DISABLE_BLOCK 0 - -#ifdef CV_MALLOC_ALIGN -#undef CV_MALLOC_ALIGN -#endif -#define CV_MALLOC_ALIGN 32 // required for AVX optimization - -#define setIppErrorStatus() cv::ipp::setIppStatus(-1, CV_Func, __FILE__, __LINE__) - -static inline IppiSize ippiSize(int width, int height) -{ - IppiSize size = { width, height }; - return size; -} - -static inline IppiSize ippiSize(const cv::Size & _size) -{ - IppiSize size = { _size.width, _size.height }; - return size; -} - -static inline IppiBorderType ippiGetBorderType(int borderTypeNI) -{ - return borderTypeNI == cv::BORDER_CONSTANT ? ippBorderConst : - borderTypeNI == cv::BORDER_WRAP ? ippBorderWrap : - borderTypeNI == cv::BORDER_REPLICATE ? ippBorderRepl : - borderTypeNI == cv::BORDER_REFLECT_101 ? ippBorderMirror : - borderTypeNI == cv::BORDER_REFLECT ? ippBorderMirrorR : (IppiBorderType)-1; -} - -static inline IppDataType ippiGetDataType(int depth) -{ - return depth == CV_8U ? ipp8u : - depth == CV_8S ? ipp8s : - depth == CV_16U ? ipp16u : - depth == CV_16S ? ipp16s : - depth == CV_32S ? ipp32s : - depth == CV_32F ? ipp32f : - depth == CV_64F ? ipp64f : (IppDataType)-1; -} - -// IPP temporary buffer hepler -template -class IppAutoBuffer -{ -public: - IppAutoBuffer() { m_pBuffer = NULL; } - IppAutoBuffer(int size) { Alloc(size); } - ~IppAutoBuffer() { Release(); } - T* Alloc(int size) { m_pBuffer = (T*)ippMalloc(size); return m_pBuffer; } - void Release() { if(m_pBuffer) ippFree(m_pBuffer); } - inline operator T* () { return (T*)m_pBuffer;} - inline operator const T* () const { return (const T*)m_pBuffer;} -private: - // Disable copy operations - IppAutoBuffer(IppAutoBuffer &) {}; - IppAutoBuffer& operator =(const IppAutoBuffer &) {return *this;}; - - T* m_pBuffer; -}; - -#else -#define IPP_VERSION_X100 0 -#endif - -// There shoud be no API difference in OpenCV between ICV and IPP since 9.0 -#if (defined HAVE_IPP_ICV_ONLY) && IPP_VERSION_X100 >= 900 -#undef HAVE_IPP_ICV_ONLY -#endif - -#ifdef HAVE_IPP_ICV_ONLY -#define HAVE_ICV 1 -#else -#define HAVE_ICV 0 -#endif - -#if defined HAVE_IPP -#if IPP_VERSION_X100 >= 900 -#define IPP_INITIALIZER(FEAT) \ -{ \ - if(FEAT) \ - ippSetCpuFeatures(FEAT); \ - else \ - ippInit(); \ -} -#elif IPP_VERSION_X100 >= 800 -#define IPP_INITIALIZER(FEAT) \ -{ \ - ippInit(); \ -} -#else -#define IPP_INITIALIZER(FEAT) \ -{ \ - ippStaticInit(); \ -} -#endif - -#ifdef CVAPI_EXPORTS -#define IPP_INITIALIZER_AUTO \ -struct __IppInitializer__ \ -{ \ - __IppInitializer__() \ - {IPP_INITIALIZER(cv::ipp::getIppFeatures())} \ -}; \ -static struct __IppInitializer__ __ipp_initializer__; -#else -#define IPP_INITIALIZER_AUTO -#endif -#else -#define IPP_INITIALIZER -#define IPP_INITIALIZER_AUTO -#endif - -#define CV_IPP_CHECK_COND (cv::ipp::useIPP()) -#define CV_IPP_CHECK() if(CV_IPP_CHECK_COND) - -#ifdef HAVE_IPP - -#ifdef CV_IPP_RUN_VERBOSE -#define CV_IPP_RUN_(condition, func, ...) \ - { \ - if (cv::ipp::useIPP() && (condition) && func) \ - { \ - printf("%s: IPP implementation is running\n", CV_Func); \ - fflush(stdout); \ - CV_IMPL_ADD(CV_IMPL_IPP); \ - return __VA_ARGS__; \ - } \ - else \ - { \ - printf("%s: Plain implementation is running\n", CV_Func); \ - fflush(stdout); \ - } \ - } -#elif defined CV_IPP_RUN_ASSERT -#define CV_IPP_RUN_(condition, func, ...) \ - { \ - if (cv::ipp::useIPP() && (condition)) \ - { \ - if(func) \ - { \ - CV_IMPL_ADD(CV_IMPL_IPP); \ - } \ - else \ - { \ - setIppErrorStatus(); \ - CV_Error(cv::Error::StsAssert, #func); \ - } \ - return __VA_ARGS__; \ - } \ - } -#else -#define CV_IPP_RUN_(condition, func, ...) \ - if (cv::ipp::useIPP() && (condition) && func) \ - { \ - CV_IMPL_ADD(CV_IMPL_IPP); \ - return __VA_ARGS__; \ - } -#endif - -#else -#define CV_IPP_RUN_(condition, func, ...) -#endif - -#define CV_IPP_RUN(condition, func, ...) CV_IPP_RUN_(condition, func, __VA_ARGS__) - - -#ifndef IPPI_CALL -# define IPPI_CALL(func) CV_Assert((func) >= 0) -#endif - -/* IPP-compatible return codes */ -typedef enum CvStatus -{ - CV_BADMEMBLOCK_ERR = -113, - CV_INPLACE_NOT_SUPPORTED_ERR= -112, - CV_UNMATCHED_ROI_ERR = -111, - CV_NOTFOUND_ERR = -110, - CV_BADCONVERGENCE_ERR = -109, - - CV_BADDEPTH_ERR = -107, - CV_BADROI_ERR = -106, - CV_BADHEADER_ERR = -105, - CV_UNMATCHED_FORMATS_ERR = -104, - CV_UNSUPPORTED_COI_ERR = -103, - CV_UNSUPPORTED_CHANNELS_ERR = -102, - CV_UNSUPPORTED_DEPTH_ERR = -101, - CV_UNSUPPORTED_FORMAT_ERR = -100, - - CV_BADARG_ERR = -49, //ipp comp - CV_NOTDEFINED_ERR = -48, //ipp comp - - CV_BADCHANNELS_ERR = -47, //ipp comp - CV_BADRANGE_ERR = -44, //ipp comp - CV_BADSTEP_ERR = -29, //ipp comp - - CV_BADFLAG_ERR = -12, - CV_DIV_BY_ZERO_ERR = -11, //ipp comp - CV_BADCOEF_ERR = -10, - - CV_BADFACTOR_ERR = -7, - CV_BADPOINT_ERR = -6, - CV_BADSCALE_ERR = -4, - CV_OUTOFMEM_ERR = -3, - CV_NULLPTR_ERR = -2, - CV_BADSIZE_ERR = -1, - CV_NO_ERR = 0, - CV_OK = CV_NO_ERR -} -CvStatus; - -#ifdef HAVE_TEGRA_OPTIMIZATION -namespace tegra { - -CV_EXPORTS bool useTegra(); -CV_EXPORTS void setUseTegra(bool flag); - -} -#endif - -//! @endcond - -#endif // __OPENCV_CORE_PRIVATE_HPP__ diff --git a/IPL/include/opencv/opencv2/core/ptr.inl.hpp b/IPL/include/opencv/opencv2/core/ptr.inl.hpp deleted file mode 100644 index 3f6f214..0000000 --- a/IPL/include/opencv/opencv2/core/ptr.inl.hpp +++ /dev/null @@ -1,365 +0,0 @@ -/*M/////////////////////////////////////////////////////////////////////////////////////// -// -// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. -// -// By downloading, copying, installing or using the software you agree to this license. -// If you do not agree to this license, do not download, install, -// copy or use the software. -// -// -// License Agreement -// For Open Source Computer Vision Library -// -// Copyright (C) 2013, NVIDIA Corporation, all rights reserved. -// Third party copyrights are property of their respective owners. -// -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: -// -// * Redistribution's of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// * Redistribution's in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// * The name of the copyright holders may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// This software is provided by the copyright holders and contributors "as is" and -// any express or implied warranties, including, but not limited to, the implied -// warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the copyright holders or contributors be liable for any direct, -// indirect, incidental, special, exemplary, or consequential damages -// (including, but not limited to, procurement of substitute goods or services; -// loss of use, data, or profits; or business interruption) however caused -// and on any theory of liability, whether in contract, strict liability, -// or tort (including negligence or otherwise) arising in any way out of -// the use of this software, even if advised of the possibility of such damage. -// -//M*/ - -#ifndef __OPENCV_CORE_PTR_INL_HPP__ -#define __OPENCV_CORE_PTR_INL_HPP__ - -#include - -//! @cond IGNORED - -namespace cv { - -template -void DefaultDeleter::operator () (Y* p) const -{ - delete p; -} - -namespace detail -{ - -struct PtrOwner -{ - PtrOwner() : refCount(1) - {} - - void incRef() - { - CV_XADD(&refCount, 1); - } - - void decRef() - { - if (CV_XADD(&refCount, -1) == 1) deleteSelf(); - } - -protected: - /* This doesn't really need to be virtual, since PtrOwner is never deleted - directly, but it doesn't hurt and it helps avoid warnings. */ - virtual ~PtrOwner() - {} - - virtual void deleteSelf() = 0; - -private: - unsigned int refCount; - - // noncopyable - PtrOwner(const PtrOwner&); - PtrOwner& operator = (const PtrOwner&); -}; - -template -struct PtrOwnerImpl : PtrOwner -{ - PtrOwnerImpl(Y* p, D d) : owned(p), deleter(d) - {} - - void deleteSelf() - { - deleter(owned); - delete this; - } - -private: - Y* owned; - D deleter; -}; - - -} - -template -Ptr::Ptr() : owner(NULL), stored(NULL) -{} - -template -template -Ptr::Ptr(Y* p) - : owner(p - ? new detail::PtrOwnerImpl >(p, DefaultDeleter()) - : NULL), - stored(p) -{} - -template -template -Ptr::Ptr(Y* p, D d) - : owner(p - ? new detail::PtrOwnerImpl(p, d) - : NULL), - stored(p) -{} - -template -Ptr::Ptr(const Ptr& o) : owner(o.owner), stored(o.stored) -{ - if (owner) owner->incRef(); -} - -template -template -Ptr::Ptr(const Ptr& o) : owner(o.owner), stored(o.stored) -{ - if (owner) owner->incRef(); -} - -template -template -Ptr::Ptr(const Ptr& o, T* p) : owner(o.owner), stored(p) -{ - if (owner) owner->incRef(); -} - -template -Ptr::~Ptr() -{ - release(); -} - -template -Ptr& Ptr::operator = (const Ptr& o) -{ - Ptr(o).swap(*this); - return *this; -} - -template -template -Ptr& Ptr::operator = (const Ptr& o) -{ - Ptr(o).swap(*this); - return *this; -} - -template -void Ptr::release() -{ - if (owner) owner->decRef(); - owner = NULL; - stored = NULL; -} - -template -template -void Ptr::reset(Y* p) -{ - Ptr(p).swap(*this); -} - -template -template -void Ptr::reset(Y* p, D d) -{ - Ptr(p, d).swap(*this); -} - -template -void Ptr::swap(Ptr& o) -{ - std::swap(owner, o.owner); - std::swap(stored, o.stored); -} - -template -T* Ptr::get() const -{ - return stored; -} - -template -typename detail::RefOrVoid::type Ptr::operator * () const -{ - return *stored; -} - -template -T* Ptr::operator -> () const -{ - return stored; -} - -template -Ptr::operator T* () const -{ - return stored; -} - - -template -bool Ptr::empty() const -{ - return !stored; -} - -template -template -Ptr Ptr::staticCast() const -{ - return Ptr(*this, static_cast(stored)); -} - -template -template -Ptr Ptr::constCast() const -{ - return Ptr(*this, const_cast(stored)); -} - -template -template -Ptr Ptr::dynamicCast() const -{ - return Ptr(*this, dynamic_cast(stored)); -} - -#ifdef CV_CXX_MOVE_SEMANTICS - -template -Ptr::Ptr(Ptr&& o) : owner(o.owner), stored(o.stored) -{ - o.owner = NULL; - o.stored = NULL; -} - -template -Ptr& Ptr::operator = (Ptr&& o) -{ - release(); - owner = o.owner; - stored = o.stored; - o.owner = NULL; - o.stored = NULL; - return *this; -} - -#endif - - -template -void swap(Ptr& ptr1, Ptr& ptr2){ - ptr1.swap(ptr2); -} - -template -bool operator == (const Ptr& ptr1, const Ptr& ptr2) -{ - return ptr1.get() == ptr2.get(); -} - -template -bool operator != (const Ptr& ptr1, const Ptr& ptr2) -{ - return ptr1.get() != ptr2.get(); -} - -template -Ptr makePtr() -{ - return Ptr(new T()); -} - -template -Ptr makePtr(const A1& a1) -{ - return Ptr(new T(a1)); -} - -template -Ptr makePtr(const A1& a1, const A2& a2) -{ - return Ptr(new T(a1, a2)); -} - -template -Ptr makePtr(const A1& a1, const A2& a2, const A3& a3) -{ - return Ptr(new T(a1, a2, a3)); -} - -template -Ptr makePtr(const A1& a1, const A2& a2, const A3& a3, const A4& a4) -{ - return Ptr(new T(a1, a2, a3, a4)); -} - -template -Ptr makePtr(const A1& a1, const A2& a2, const A3& a3, const A4& a4, const A5& a5) -{ - return Ptr(new T(a1, a2, a3, a4, a5)); -} - -template -Ptr makePtr(const A1& a1, const A2& a2, const A3& a3, const A4& a4, const A5& a5, const A6& a6) -{ - return Ptr(new T(a1, a2, a3, a4, a5, a6)); -} - -template -Ptr makePtr(const A1& a1, const A2& a2, const A3& a3, const A4& a4, const A5& a5, const A6& a6, const A7& a7) -{ - return Ptr(new T(a1, a2, a3, a4, a5, a6, a7)); -} - -template -Ptr makePtr(const A1& a1, const A2& a2, const A3& a3, const A4& a4, const A5& a5, const A6& a6, const A7& a7, const A8& a8) -{ - return Ptr(new T(a1, a2, a3, a4, a5, a6, a7, a8)); -} - -template -Ptr makePtr(const A1& a1, const A2& a2, const A3& a3, const A4& a4, const A5& a5, const A6& a6, const A7& a7, const A8& a8, const A9& a9) -{ - return Ptr(new T(a1, a2, a3, a4, a5, a6, a7, a8, a9)); -} - -template -Ptr makePtr(const A1& a1, const A2& a2, const A3& a3, const A4& a4, const A5& a5, const A6& a6, const A7& a7, const A8& a8, const A9& a9, const A10& a10) -{ - return Ptr(new T(a1, a2, a3, a4, a5, a6, a7, a8, a9, a10)); -} - -} // namespace cv - -//! @endcond - -#endif // __OPENCV_CORE_PTR_INL_HPP__ diff --git a/IPL/include/opencv/opencv2/core/saturate.hpp b/IPL/include/opencv/opencv2/core/saturate.hpp index 1442eab..8127e3d 100644 --- a/IPL/include/opencv/opencv2/core/saturate.hpp +++ b/IPL/include/opencv/opencv2/core/saturate.hpp @@ -42,8 +42,8 @@ // //M*/ -#ifndef __OPENCV_CORE_SATURATE_HPP__ -#define __OPENCV_CORE_SATURATE_HPP__ +#ifndef OPENCV_CORE_SATURATE_HPP +#define OPENCV_CORE_SATURATE_HPP #include "opencv2/core/cvdef.h" #include "opencv2/core/fast_math.hpp" @@ -58,8 +58,8 @@ namespace cv /** @brief Template function for accurate conversion from one primitive type to another. - The functions saturate_cast resemble the standard C++ cast operations, such as static_cast\() - and others. They perform an efficient and accurate conversion from one primitive type to another + The function saturate_cast resembles the standard C++ cast operations, such as static_cast\() + and others. It perform an efficient and accurate conversion from one primitive type to another (see the introduction chapter). saturate in the name means that when the input value v is out of the range of the target type, the result is not formed just by taking low bits of the input, but instead the value is clipped. For example: @@ -74,8 +74,6 @@ namespace cv the floating-point value is first rounded to the nearest integer and then clipped if needed (when the target type is 8- or 16-bit). - This operation is used in the simplest or most complex image processing functions in OpenCV. - @param v Function parameter. @sa add, subtract, multiply, divide, Mat::convertTo */ @@ -136,15 +134,46 @@ template<> inline short saturate_cast(double v) { int iv = cvRound( template<> inline short saturate_cast(int64 v) { return (short)((uint64)((int64)v - SHRT_MIN) <= (uint64)USHRT_MAX ? v : v > 0 ? SHRT_MAX : SHRT_MIN); } template<> inline short saturate_cast(uint64 v) { return (short)std::min(v, (uint64)SHRT_MAX); } +template<> inline int saturate_cast(unsigned v) { return (int)std::min(v, (unsigned)INT_MAX); } +template<> inline int saturate_cast(int64 v) { return (int)((uint64)(v - INT_MIN) <= (uint64)UINT_MAX ? v : v > 0 ? INT_MAX : INT_MIN); } +template<> inline int saturate_cast(uint64 v) { return (int)std::min(v, (uint64)INT_MAX); } template<> inline int saturate_cast(float v) { return cvRound(v); } template<> inline int saturate_cast(double v) { return cvRound(v); } +template<> inline unsigned saturate_cast(schar v) { return (unsigned)std::max(v, (schar)0); } +template<> inline unsigned saturate_cast(short v) { return (unsigned)std::max(v, (short)0); } +template<> inline unsigned saturate_cast(int v) { return (unsigned)std::max(v, (int)0); } +template<> inline unsigned saturate_cast(int64 v) { return (unsigned)((uint64)v <= (uint64)UINT_MAX ? v : v > 0 ? UINT_MAX : 0); } +template<> inline unsigned saturate_cast(uint64 v) { return (unsigned)std::min(v, (uint64)UINT_MAX); } // we intentionally do not clip negative numbers, to make -1 become 0xffffffff etc. -template<> inline unsigned saturate_cast(float v) { return cvRound(v); } -template<> inline unsigned saturate_cast(double v) { return cvRound(v); } +template<> inline unsigned saturate_cast(float v) { return static_cast(cvRound(v)); } +template<> inline unsigned saturate_cast(double v) { return static_cast(cvRound(v)); } + +template<> inline uint64 saturate_cast(schar v) { return (uint64)std::max(v, (schar)0); } +template<> inline uint64 saturate_cast(short v) { return (uint64)std::max(v, (short)0); } +template<> inline uint64 saturate_cast(int v) { return (uint64)std::max(v, (int)0); } +template<> inline uint64 saturate_cast(int64 v) { return (uint64)std::max(v, (int64)0); } + +template<> inline int64 saturate_cast(uint64 v) { return (int64)std::min(v, (uint64)LLONG_MAX); } + +/** @overload */ +template static inline _Tp saturate_cast(float16_t v) { return saturate_cast<_Tp>((float)v); } + +// in theory, we could use a LUT for 8u/8s->16f conversion, +// but with hardware support for FP32->FP16 conversion the current approach is preferable +template<> inline float16_t saturate_cast(uchar v) { return float16_t((float)v); } +template<> inline float16_t saturate_cast(schar v) { return float16_t((float)v); } +template<> inline float16_t saturate_cast(ushort v) { return float16_t((float)v); } +template<> inline float16_t saturate_cast(short v) { return float16_t((float)v); } +template<> inline float16_t saturate_cast(unsigned v){ return float16_t((float)v); } +template<> inline float16_t saturate_cast(int v) { return float16_t((float)v); } +template<> inline float16_t saturate_cast(uint64 v) { return float16_t((float)v); } +template<> inline float16_t saturate_cast(int64 v) { return float16_t((float)v); } +template<> inline float16_t saturate_cast(float v) { return float16_t(v); } +template<> inline float16_t saturate_cast(double v) { return float16_t((float)v); } //! @} } // cv -#endif // __OPENCV_CORE_SATURATE_HPP__ +#endif // OPENCV_CORE_SATURATE_HPP diff --git a/IPL/include/opencv/opencv2/core/simd_intrinsics.hpp b/IPL/include/opencv/opencv2/core/simd_intrinsics.hpp new file mode 100644 index 0000000..c50923f --- /dev/null +++ b/IPL/include/opencv/opencv2/core/simd_intrinsics.hpp @@ -0,0 +1,88 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#ifndef OPENCV_CORE_SIMD_INTRINSICS_HPP +#define OPENCV_CORE_SIMD_INTRINSICS_HPP + +/** +Helper header to support SIMD intrinsics (universal intrinsics) in user code. +Intrinsics documentation: https://docs.opencv.org/master/df/d91/group__core__hal__intrin.html + + +Checks of target CPU instruction set based on compiler definitions don't work well enough. +More reliable solutions require utilization of configuration systems (like CMake). + +So, probably you need to specify your own configuration. + +You can do that via CMake in this way: + add_definitions(/DOPENCV_SIMD_CONFIG_HEADER=opencv_simd_config_custom.hpp) +or + add_definitions(/DOPENCV_SIMD_CONFIG_INCLUDE_DIR=1) + +Additionally you may need to add include directory to your files: + include_directories("${CMAKE_CURRENT_LIST_DIR}/opencv_config_${MYTARGET}") + +These files can be pre-generated for target configurations of your application +or generated by CMake on the fly (use CMAKE_BINARY_DIR for that). + +Notes: +- H/W capability checks are still responsibility of your application +- runtime dispatching is not covered by this helper header +*/ + +#ifdef __OPENCV_BUILD +#error "Use core/hal/intrin.hpp during OpenCV build" +#endif + +#ifdef OPENCV_HAL_INTRIN_HPP +#error "core/simd_intrinsics.hpp must be included before core/hal/intrin.hpp" +#endif + +#include "opencv2/core/cvdef.h" +#include "opencv2/core/version.hpp" + +#ifdef OPENCV_SIMD_CONFIG_HEADER +#include CVAUX_STR(OPENCV_SIMD_CONFIG_HEADER) +#elif defined(OPENCV_SIMD_CONFIG_INCLUDE_DIR) +#include "opencv_simd_config.hpp" // corresponding directory should be added via -I compiler parameter +#else // custom config headers + +#if (!defined(CV_AVX_512F) || !CV_AVX_512F) && (defined(__AVX512__) || defined(__AVX512F__)) +# include +# undef CV_AVX_512F +# define CV_AVX_512F 1 +# ifndef OPENCV_SIMD_DONT_ASSUME_SKX // Skylake-X with AVX-512F/CD/BW/DQ/VL +# undef CV_AVX512_SKX +# define CV_AVX512_SKX 1 +# undef CV_AVX_512CD +# define CV_AVX_512CD 1 +# undef CV_AVX_512BW +# define CV_AVX_512BW 1 +# undef CV_AVX_512DQ +# define CV_AVX_512DQ 1 +# undef CV_AVX_512VL +# define CV_AVX_512VL 1 +# endif +#endif // AVX512 + +// GCC/Clang: -mavx2 +// MSVC: /arch:AVX2 +#if defined __AVX2__ +# include +# undef CV_AVX2 +# define CV_AVX2 1 +# if defined __F16C__ +# undef CV_FP16 +# define CV_FP16 1 +# endif +#endif + +#endif + +// SSE / NEON / VSX is handled by cv_cpu_dispatch.h compatibility block +#include "cv_cpu_dispatch.h" + +#include "hal/intrin.hpp" + +#endif // OPENCV_CORE_SIMD_INTRINSICS_HPP diff --git a/IPL/include/opencv/opencv2/core/softfloat.hpp b/IPL/include/opencv/opencv2/core/softfloat.hpp new file mode 100644 index 0000000..485e15c --- /dev/null +++ b/IPL/include/opencv/opencv2/core/softfloat.hpp @@ -0,0 +1,514 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html + +// This file is based on files from package issued with the following license: + +/*============================================================================ + +This C header file is part of the SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3c, by John R. Hauser. + +Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the +University of California. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#pragma once +#ifndef softfloat_h +#define softfloat_h 1 + +#include "cvdef.h" + +namespace cv +{ + +/** @addtogroup core_utils_softfloat + + [SoftFloat](http://www.jhauser.us/arithmetic/SoftFloat.html) is a software implementation + of floating-point calculations according to IEEE 754 standard. + All calculations are done in integers, that's why they are machine-independent and bit-exact. + This library can be useful in accuracy-critical parts like look-up tables generation, tests, etc. + OpenCV contains a subset of SoftFloat partially rewritten to C++. + + ### Types + + There are two basic types: @ref softfloat and @ref softdouble. + These types are binary compatible with float and double types respectively + and support conversions to/from them. + Other types from original SoftFloat library like fp16 or fp128 were thrown away + as well as quiet/signaling NaN support, on-the-fly rounding mode switch + and exception flags (though exceptions can be implemented in the future). + + ### Operations + + Both types support the following: + - Construction from signed and unsigned 32-bit and 64 integers, + float/double or raw binary representation + - Conversions between each other, to float or double and to int + using @ref cvRound, @ref cvTrunc, @ref cvFloor, @ref cvCeil or a bunch of + saturate_cast functions + - Add, subtract, multiply, divide, remainder, square root, FMA with absolute precision + - Comparison operations + - Explicit sign, exponent and significand manipulation through get/set methods, + number state indicators (isInf, isNan, isSubnormal) + - Type-specific constants like eps, minimum/maximum value, best pi approximation, etc. + - min(), max(), abs(), exp(), log() and pow() functions + +*/ +//! @{ + +struct softfloat; +struct softdouble; + +struct CV_EXPORTS softfloat +{ +public: + /** @brief Default constructor */ + softfloat() { v = 0; } + /** @brief Copy constructor */ + softfloat( const softfloat& c) { v = c.v; } + /** @brief Assign constructor */ + softfloat& operator=( const softfloat& c ) + { + if(&c != this) v = c.v; + return *this; + } + /** @brief Construct from raw + + Builds new value from raw binary representation + */ + static const softfloat fromRaw( const uint32_t a ) { softfloat x; x.v = a; return x; } + + /** @brief Construct from integer */ + explicit softfloat( const uint32_t ); + explicit softfloat( const uint64_t ); + explicit softfloat( const int32_t ); + explicit softfloat( const int64_t ); + +#ifdef CV_INT32_T_IS_LONG_INT + // for platforms with int32_t = long int + explicit softfloat( const int a ) { *this = softfloat(static_cast(a)); } +#endif + + /** @brief Construct from float */ + explicit softfloat( const float a ) { Cv32suf s; s.f = a; v = s.u; } + + /** @brief Type casts */ + operator softdouble() const; + operator float() const { Cv32suf s; s.u = v; return s.f; } + + /** @brief Basic arithmetics */ + softfloat operator + (const softfloat&) const; + softfloat operator - (const softfloat&) const; + softfloat operator * (const softfloat&) const; + softfloat operator / (const softfloat&) const; + softfloat operator - () const { softfloat x; x.v = v ^ (1U << 31); return x; } + + /** @brief Remainder operator + + A quote from original SoftFloat manual: + + > The IEEE Standard remainder operation computes the value + > a - n * b, where n is the integer closest to a / b. + > If a / b is exactly halfway between two integers, n is the even integer + > closest to a / b. The IEEE Standard’s remainder operation is always exact and so requires no rounding. + > Depending on the relative magnitudes of the operands, the remainder functions + > can take considerably longer to execute than the other SoftFloat functions. + > This is an inherent characteristic of the remainder operation itself and is not a flaw + > in the SoftFloat implementation. + */ + softfloat operator % (const softfloat&) const; + + softfloat& operator += (const softfloat& a) { *this = *this + a; return *this; } + softfloat& operator -= (const softfloat& a) { *this = *this - a; return *this; } + softfloat& operator *= (const softfloat& a) { *this = *this * a; return *this; } + softfloat& operator /= (const softfloat& a) { *this = *this / a; return *this; } + softfloat& operator %= (const softfloat& a) { *this = *this % a; return *this; } + + /** @brief Comparison operations + + - Any operation with NaN produces false + + The only exception is when x is NaN: x != y for any y. + - Positive and negative zeros are equal + */ + bool operator == ( const softfloat& ) const; + bool operator != ( const softfloat& ) const; + bool operator > ( const softfloat& ) const; + bool operator >= ( const softfloat& ) const; + bool operator < ( const softfloat& ) const; + bool operator <= ( const softfloat& ) const; + + /** @brief NaN state indicator */ + inline bool isNaN() const { return (v & 0x7fffffff) > 0x7f800000; } + /** @brief Inf state indicator */ + inline bool isInf() const { return (v & 0x7fffffff) == 0x7f800000; } + /** @brief Subnormal number indicator */ + inline bool isSubnormal() const { return ((v >> 23) & 0xFF) == 0; } + + /** @brief Get sign bit */ + inline bool getSign() const { return (v >> 31) != 0; } + /** @brief Construct a copy with new sign bit */ + inline softfloat setSign(bool sign) const { softfloat x; x.v = (v & ((1U << 31) - 1)) | ((uint32_t)sign << 31); return x; } + /** @brief Get 0-based exponent */ + inline int getExp() const { return ((v >> 23) & 0xFF) - 127; } + /** @brief Construct a copy with new 0-based exponent */ + inline softfloat setExp(int e) const { softfloat x; x.v = (v & 0x807fffff) | (((e + 127) & 0xFF) << 23 ); return x; } + + /** @brief Get a fraction part + + Returns a number 1 <= x < 2 with the same significand + */ + inline softfloat getFrac() const + { + uint_fast32_t vv = (v & 0x007fffff) | (127 << 23); + return softfloat::fromRaw(vv); + } + /** @brief Construct a copy with provided significand + + Constructs a copy of a number with significand taken from parameter + */ + inline softfloat setFrac(const softfloat& s) const + { + softfloat x; + x.v = (v & 0xff800000) | (s.v & 0x007fffff); + return x; + } + + /** @brief Zero constant */ + static softfloat zero() { return softfloat::fromRaw( 0 ); } + /** @brief Positive infinity constant */ + static softfloat inf() { return softfloat::fromRaw( 0xFF << 23 ); } + /** @brief Default NaN constant */ + static softfloat nan() { return softfloat::fromRaw( 0x7fffffff ); } + /** @brief One constant */ + static softfloat one() { return softfloat::fromRaw( 127 << 23 ); } + /** @brief Smallest normalized value */ + static softfloat min() { return softfloat::fromRaw( 0x01 << 23 ); } + /** @brief Difference between 1 and next representable value */ + static softfloat eps() { return softfloat::fromRaw( (127 - 23) << 23 ); } + /** @brief Biggest finite value */ + static softfloat max() { return softfloat::fromRaw( (0xFF << 23) - 1 ); } + /** @brief Correct pi approximation */ + static softfloat pi() { return softfloat::fromRaw( 0x40490fdb ); } + + uint32_t v; +}; + +/*---------------------------------------------------------------------------- +*----------------------------------------------------------------------------*/ + +struct CV_EXPORTS softdouble +{ +public: + /** @brief Default constructor */ + softdouble() : v(0) { } + /** @brief Copy constructor */ + softdouble( const softdouble& c) { v = c.v; } + /** @brief Assign constructor */ + softdouble& operator=( const softdouble& c ) + { + if(&c != this) v = c.v; + return *this; + } + /** @brief Construct from raw + + Builds new value from raw binary representation + */ + static softdouble fromRaw( const uint64_t a ) { softdouble x; x.v = a; return x; } + + /** @brief Construct from integer */ + explicit softdouble( const uint32_t ); + explicit softdouble( const uint64_t ); + explicit softdouble( const int32_t ); + explicit softdouble( const int64_t ); + +#ifdef CV_INT32_T_IS_LONG_INT + // for platforms with int32_t = long int + explicit softdouble( const int a ) { *this = softdouble(static_cast(a)); } +#endif + + /** @brief Construct from double */ + explicit softdouble( const double a ) { Cv64suf s; s.f = a; v = s.u; } + + /** @brief Type casts */ + operator softfloat() const; + operator double() const { Cv64suf s; s.u = v; return s.f; } + + /** @brief Basic arithmetics */ + softdouble operator + (const softdouble&) const; + softdouble operator - (const softdouble&) const; + softdouble operator * (const softdouble&) const; + softdouble operator / (const softdouble&) const; + softdouble operator - () const { softdouble x; x.v = v ^ (1ULL << 63); return x; } + + /** @brief Remainder operator + + A quote from original SoftFloat manual: + + > The IEEE Standard remainder operation computes the value + > a - n * b, where n is the integer closest to a / b. + > If a / b is exactly halfway between two integers, n is the even integer + > closest to a / b. The IEEE Standard’s remainder operation is always exact and so requires no rounding. + > Depending on the relative magnitudes of the operands, the remainder functions + > can take considerably longer to execute than the other SoftFloat functions. + > This is an inherent characteristic of the remainder operation itself and is not a flaw + > in the SoftFloat implementation. + */ + softdouble operator % (const softdouble&) const; + + softdouble& operator += (const softdouble& a) { *this = *this + a; return *this; } + softdouble& operator -= (const softdouble& a) { *this = *this - a; return *this; } + softdouble& operator *= (const softdouble& a) { *this = *this * a; return *this; } + softdouble& operator /= (const softdouble& a) { *this = *this / a; return *this; } + softdouble& operator %= (const softdouble& a) { *this = *this % a; return *this; } + + /** @brief Comparison operations + + - Any operation with NaN produces false + + The only exception is when x is NaN: x != y for any y. + - Positive and negative zeros are equal + */ + bool operator == ( const softdouble& ) const; + bool operator != ( const softdouble& ) const; + bool operator > ( const softdouble& ) const; + bool operator >= ( const softdouble& ) const; + bool operator < ( const softdouble& ) const; + bool operator <= ( const softdouble& ) const; + + /** @brief NaN state indicator */ + inline bool isNaN() const { return (v & 0x7fffffffffffffff) > 0x7ff0000000000000; } + /** @brief Inf state indicator */ + inline bool isInf() const { return (v & 0x7fffffffffffffff) == 0x7ff0000000000000; } + /** @brief Subnormal number indicator */ + inline bool isSubnormal() const { return ((v >> 52) & 0x7FF) == 0; } + + /** @brief Get sign bit */ + inline bool getSign() const { return (v >> 63) != 0; } + /** @brief Construct a copy with new sign bit */ + softdouble setSign(bool sign) const { softdouble x; x.v = (v & ((1ULL << 63) - 1)) | ((uint_fast64_t)(sign) << 63); return x; } + /** @brief Get 0-based exponent */ + inline int getExp() const { return ((v >> 52) & 0x7FF) - 1023; } + /** @brief Construct a copy with new 0-based exponent */ + inline softdouble setExp(int e) const + { + softdouble x; + x.v = (v & 0x800FFFFFFFFFFFFF) | ((uint_fast64_t)((e + 1023) & 0x7FF) << 52); + return x; + } + + /** @brief Get a fraction part + + Returns a number 1 <= x < 2 with the same significand + */ + inline softdouble getFrac() const + { + uint_fast64_t vv = (v & 0x000FFFFFFFFFFFFF) | ((uint_fast64_t)(1023) << 52); + return softdouble::fromRaw(vv); + } + /** @brief Construct a copy with provided significand + + Constructs a copy of a number with significand taken from parameter + */ + inline softdouble setFrac(const softdouble& s) const + { + softdouble x; + x.v = (v & 0xFFF0000000000000) | (s.v & 0x000FFFFFFFFFFFFF); + return x; + } + + /** @brief Zero constant */ + static softdouble zero() { return softdouble::fromRaw( 0 ); } + /** @brief Positive infinity constant */ + static softdouble inf() { return softdouble::fromRaw( (uint_fast64_t)(0x7FF) << 52 ); } + /** @brief Default NaN constant */ + static softdouble nan() { return softdouble::fromRaw( CV_BIG_INT(0x7FFFFFFFFFFFFFFF) ); } + /** @brief One constant */ + static softdouble one() { return softdouble::fromRaw( (uint_fast64_t)( 1023) << 52 ); } + /** @brief Smallest normalized value */ + static softdouble min() { return softdouble::fromRaw( (uint_fast64_t)( 0x01) << 52 ); } + /** @brief Difference between 1 and next representable value */ + static softdouble eps() { return softdouble::fromRaw( (uint_fast64_t)( 1023 - 52 ) << 52 ); } + /** @brief Biggest finite value */ + static softdouble max() { return softdouble::fromRaw( ((uint_fast64_t)(0x7FF) << 52) - 1 ); } + /** @brief Correct pi approximation */ + static softdouble pi() { return softdouble::fromRaw( CV_BIG_INT(0x400921FB54442D18) ); } + + uint64_t v; +}; + +/*---------------------------------------------------------------------------- +*----------------------------------------------------------------------------*/ + +/** @brief Fused Multiplication and Addition + +Computes (a*b)+c with single rounding +*/ +CV_EXPORTS softfloat mulAdd( const softfloat& a, const softfloat& b, const softfloat & c); +CV_EXPORTS softdouble mulAdd( const softdouble& a, const softdouble& b, const softdouble& c); + +/** @brief Square root */ +CV_EXPORTS softfloat sqrt( const softfloat& a ); +CV_EXPORTS softdouble sqrt( const softdouble& a ); +} + +/*---------------------------------------------------------------------------- +| Ported from OpenCV and added for usability +*----------------------------------------------------------------------------*/ + +/** @brief Truncates number to integer with minimum magnitude */ +CV_EXPORTS int cvTrunc(const cv::softfloat& a); +CV_EXPORTS int cvTrunc(const cv::softdouble& a); + +/** @brief Rounds a number to nearest even integer */ +CV_EXPORTS int cvRound(const cv::softfloat& a); +CV_EXPORTS int cvRound(const cv::softdouble& a); + +/** @brief Rounds a number to nearest even long long integer */ +CV_EXPORTS int64_t cvRound64(const cv::softdouble& a); + +/** @brief Rounds a number down to integer */ +CV_EXPORTS int cvFloor(const cv::softfloat& a); +CV_EXPORTS int cvFloor(const cv::softdouble& a); + +/** @brief Rounds number up to integer */ +CV_EXPORTS int cvCeil(const cv::softfloat& a); +CV_EXPORTS int cvCeil(const cv::softdouble& a); + +namespace cv +{ +/** @brief Saturate casts */ +template static inline _Tp saturate_cast(softfloat a) { return _Tp(a); } +template static inline _Tp saturate_cast(softdouble a) { return _Tp(a); } + +template<> inline uchar saturate_cast(softfloat a) { return (uchar)std::max(std::min(cvRound(a), (int)UCHAR_MAX), 0); } +template<> inline uchar saturate_cast(softdouble a) { return (uchar)std::max(std::min(cvRound(a), (int)UCHAR_MAX), 0); } + +template<> inline schar saturate_cast(softfloat a) { return (schar)std::min(std::max(cvRound(a), (int)SCHAR_MIN), (int)SCHAR_MAX); } +template<> inline schar saturate_cast(softdouble a) { return (schar)std::min(std::max(cvRound(a), (int)SCHAR_MIN), (int)SCHAR_MAX); } + +template<> inline ushort saturate_cast(softfloat a) { return (ushort)std::max(std::min(cvRound(a), (int)USHRT_MAX), 0); } +template<> inline ushort saturate_cast(softdouble a) { return (ushort)std::max(std::min(cvRound(a), (int)USHRT_MAX), 0); } + +template<> inline short saturate_cast(softfloat a) { return (short)std::min(std::max(cvRound(a), (int)SHRT_MIN), (int)SHRT_MAX); } +template<> inline short saturate_cast(softdouble a) { return (short)std::min(std::max(cvRound(a), (int)SHRT_MIN), (int)SHRT_MAX); } + +template<> inline int saturate_cast(softfloat a) { return cvRound(a); } +template<> inline int saturate_cast(softdouble a) { return cvRound(a); } + +template<> inline int64_t saturate_cast(softfloat a) { return cvRound(a); } +template<> inline int64_t saturate_cast(softdouble a) { return cvRound64(a); } + +/** @brief Saturate cast to unsigned integer and unsigned long long integer +We intentionally do not clip negative numbers, to make -1 become 0xffffffff etc. +*/ +template<> inline unsigned saturate_cast(softfloat a) { return cvRound(a); } +template<> inline unsigned saturate_cast(softdouble a) { return cvRound(a); } + +template<> inline uint64_t saturate_cast(softfloat a) { return cvRound(a); } +template<> inline uint64_t saturate_cast(softdouble a) { return cvRound64(a); } + +/** @brief Min and Max functions */ +inline softfloat min(const softfloat& a, const softfloat& b) { return (a > b) ? b : a; } +inline softdouble min(const softdouble& a, const softdouble& b) { return (a > b) ? b : a; } + +inline softfloat max(const softfloat& a, const softfloat& b) { return (a > b) ? a : b; } +inline softdouble max(const softdouble& a, const softdouble& b) { return (a > b) ? a : b; } + +/** @brief Absolute value */ +inline softfloat abs( softfloat a) { softfloat x; x.v = a.v & ((1U << 31) - 1); return x; } +inline softdouble abs( softdouble a) { softdouble x; x.v = a.v & ((1ULL << 63) - 1); return x; } + +/** @brief Exponent + +Special cases: +- exp(NaN) is NaN +- exp(-Inf) == 0 +- exp(+Inf) == +Inf +*/ +CV_EXPORTS softfloat exp( const softfloat& a); +CV_EXPORTS softdouble exp( const softdouble& a); + +/** @brief Natural logarithm + +Special cases: +- log(NaN), log(x < 0) are NaN +- log(0) == -Inf +*/ +CV_EXPORTS softfloat log( const softfloat& a ); +CV_EXPORTS softdouble log( const softdouble& a ); + +/** @brief Raising to the power + +Special cases: +- x**NaN is NaN for any x +- ( |x| == 1 )**Inf is NaN +- ( |x| > 1 )**+Inf or ( |x| < 1 )**-Inf is +Inf +- ( |x| > 1 )**-Inf or ( |x| < 1 )**+Inf is 0 +- x ** 0 == 1 for any x +- x ** 1 == 1 for any x +- NaN ** y is NaN for any other y +- Inf**(y < 0) == 0 +- Inf ** y is +Inf for any other y +- (x < 0)**y is NaN for any other y if x can't be correctly rounded to integer +- 0 ** 0 == 1 +- 0 ** (y < 0) is +Inf +- 0 ** (y > 0) is 0 +*/ +CV_EXPORTS softfloat pow( const softfloat& a, const softfloat& b); +CV_EXPORTS softdouble pow( const softdouble& a, const softdouble& b); + +/** @brief Cube root + +Special cases: +- cbrt(NaN) is NaN +- cbrt(+/-Inf) is +/-Inf +*/ +CV_EXPORTS softfloat cbrt( const softfloat& a ); + +/** @brief Sine + +Special cases: +- sin(Inf) or sin(NaN) is NaN +- sin(x) == x when sin(x) is close to zero +*/ +CV_EXPORTS softdouble sin( const softdouble& a ); + +/** @brief Cosine + * +Special cases: +- cos(Inf) or cos(NaN) is NaN +- cos(x) == +/- 1 when cos(x) is close to +/- 1 +*/ +CV_EXPORTS softdouble cos( const softdouble& a ); + +//! @} core_utils_softfloat + +} // cv:: + +#endif diff --git a/IPL/include/opencv/opencv2/core/sse_utils.hpp b/IPL/include/opencv/opencv2/core/sse_utils.hpp index c87b029..0906583 100644 --- a/IPL/include/opencv/opencv2/core/sse_utils.hpp +++ b/IPL/include/opencv/opencv2/core/sse_utils.hpp @@ -39,8 +39,8 @@ // //M*/ -#ifndef __OPENCV_CORE_SSE_UTILS_HPP__ -#define __OPENCV_CORE_SSE_UTILS_HPP__ +#ifndef OPENCV_CORE_SSE_UTILS_HPP +#define OPENCV_CORE_SSE_UTILS_HPP #ifndef __cplusplus # error sse_utils.hpp header must be compiled as C++ @@ -567,7 +567,7 @@ inline void _mm_deinterleave_ps(__m128 & v_r0, __m128 & v_r1, __m128 & v_g0, __m inline void _mm_interleave_ps(__m128 & v_r0, __m128 & v_r1, __m128 & v_g0, __m128 & v_g1) { - const int mask_lo = _MM_SHUFFLE(2, 0, 2, 0), mask_hi = _MM_SHUFFLE(3, 1, 3, 1); + enum { mask_lo = _MM_SHUFFLE(2, 0, 2, 0), mask_hi = _MM_SHUFFLE(3, 1, 3, 1) }; __m128 layer2_chunk0 = _mm_shuffle_ps(v_r0, v_r1, mask_lo); __m128 layer2_chunk2 = _mm_shuffle_ps(v_r0, v_r1, mask_hi); @@ -588,7 +588,7 @@ inline void _mm_interleave_ps(__m128 & v_r0, __m128 & v_r1, __m128 & v_g0, __m12 inline void _mm_interleave_ps(__m128 & v_r0, __m128 & v_r1, __m128 & v_g0, __m128 & v_g1, __m128 & v_b0, __m128 & v_b1) { - const int mask_lo = _MM_SHUFFLE(2, 0, 2, 0), mask_hi = _MM_SHUFFLE(3, 1, 3, 1); + enum { mask_lo = _MM_SHUFFLE(2, 0, 2, 0), mask_hi = _MM_SHUFFLE(3, 1, 3, 1) }; __m128 layer2_chunk0 = _mm_shuffle_ps(v_r0, v_r1, mask_lo); __m128 layer2_chunk3 = _mm_shuffle_ps(v_r0, v_r1, mask_hi); @@ -615,7 +615,7 @@ inline void _mm_interleave_ps(__m128 & v_r0, __m128 & v_r1, __m128 & v_g0, inline void _mm_interleave_ps(__m128 & v_r0, __m128 & v_r1, __m128 & v_g0, __m128 & v_g1, __m128 & v_b0, __m128 & v_b1, __m128 & v_a0, __m128 & v_a1) { - const int mask_lo = _MM_SHUFFLE(2, 0, 2, 0), mask_hi = _MM_SHUFFLE(3, 1, 3, 1); + enum { mask_lo = _MM_SHUFFLE(2, 0, 2, 0), mask_hi = _MM_SHUFFLE(3, 1, 3, 1) }; __m128 layer2_chunk0 = _mm_shuffle_ps(v_r0, v_r1, mask_lo); __m128 layer2_chunk4 = _mm_shuffle_ps(v_r0, v_r1, mask_hi); @@ -649,4 +649,4 @@ inline void _mm_interleave_ps(__m128 & v_r0, __m128 & v_r1, __m128 & v_g0, __m12 //! @} -#endif //__OPENCV_CORE_SSE_UTILS_HPP__ +#endif //OPENCV_CORE_SSE_UTILS_HPP diff --git a/IPL/include/opencv/opencv2/core/traits.hpp b/IPL/include/opencv/opencv2/core/traits.hpp index 49bc844..52ab083 100644 --- a/IPL/include/opencv/opencv2/core/traits.hpp +++ b/IPL/include/opencv/opencv2/core/traits.hpp @@ -41,19 +41,23 @@ // //M*/ -#ifndef __OPENCV_CORE_TRAITS_HPP__ -#define __OPENCV_CORE_TRAITS_HPP__ +#ifndef OPENCV_CORE_TRAITS_HPP +#define OPENCV_CORE_TRAITS_HPP #include "opencv2/core/cvdef.h" namespace cv { +//#define OPENCV_TRAITS_ENABLE_DEPRECATED + //! @addtogroup core_basic //! @{ /** @brief Template "trait" class for OpenCV primitive data types. +@note Deprecated. This is replaced by "single purpose" traits: traits::Type and traits::Depth + A primitive OpenCV data type is one of unsigned char, bool, signed char, unsigned short, signed short, int, float, double, or a tuple of values of one of these types, where all the values in the tuple have the same type. Any primitive type from the list can be defined by an identifier in the @@ -102,10 +106,13 @@ So, such traits are used to tell OpenCV which data type you are working with, ev not native to OpenCV. For example, the matrix B initialization above is compiled because OpenCV defines the proper specialized template class DataType\ \> . This mechanism is also useful (and used in OpenCV this way) for generic algorithms implementations. + +@note Default values were dropped to stop confusing developers about using of unsupported types (see #7599) */ template class DataType { public: +#ifdef OPENCV_TRAITS_ENABLE_DEPRECATED typedef _Tp value_type; typedef value_type work_type; typedef value_type channel_type; @@ -116,6 +123,7 @@ template class DataType fmt = 0, type = CV_MAKETYPE(depth, channels) }; +#endif }; template<> class DataType @@ -253,6 +261,20 @@ template<> class DataType }; }; +template<> class DataType +{ +public: + typedef float16_t value_type; + typedef float work_type; + typedef value_type channel_type; + typedef value_type vec_type; + enum { generic_type = 0, + depth = CV_16F, + channels = 1, + fmt = (int)'h', + type = CV_MAKETYPE(depth, channels) + }; +}; /** @brief A helper class for cv::DataType @@ -270,11 +292,14 @@ template class DataDepth }; +#ifdef OPENCV_TRAITS_ENABLE_DEPRECATED template class TypeDepth { +#ifdef OPENCV_TRAITS_ENABLE_LEGACY_DEFAULTS enum { depth = CV_USRTYPE1 }; typedef void value_type; +#endif }; template<> class TypeDepth @@ -319,8 +344,74 @@ template<> class TypeDepth typedef double value_type; }; +template<> class TypeDepth +{ + enum { depth = CV_16F }; + typedef float16_t value_type; +}; + +#endif + //! @} +namespace traits { + +namespace internal { +#define CV_CREATE_MEMBER_CHECK(X) \ +template class CheckMember_##X { \ + struct Fallback { int X; }; \ + struct Derived : T, Fallback { }; \ + template struct Check; \ + typedef char CV_NO[1]; \ + typedef char CV_YES[2]; \ + template static CV_NO & func(Check *); \ + template static CV_YES & func(...); \ +public: \ + typedef CheckMember_##X type; \ + enum { value = sizeof(func(0)) == sizeof(CV_YES) }; \ +}; + +CV_CREATE_MEMBER_CHECK(fmt) +CV_CREATE_MEMBER_CHECK(type) + +} // namespace internal + + +template +struct Depth +{ enum { value = DataType::depth }; }; + +template +struct Type +{ enum { value = DataType::type }; }; + +/** Similar to traits::Type but has value = -1 in case of unknown type (instead of compiler error) */ +template >::value > +struct SafeType {}; + +template +struct SafeType +{ enum { value = -1 }; }; + +template +struct SafeType +{ enum { value = Type::value }; }; + + +template >::value > +struct SafeFmt {}; + +template +struct SafeFmt +{ enum { fmt = 0 }; }; + +template +struct SafeFmt +{ enum { fmt = DataType::fmt }; }; + + +} // namespace + } // cv -#endif // __OPENCV_CORE_TRAITS_HPP__ +#endif // OPENCV_CORE_TRAITS_HPP diff --git a/IPL/include/opencv/opencv2/core/types.hpp b/IPL/include/opencv/opencv2/core/types.hpp index e166556..819fd52 100644 --- a/IPL/include/opencv/opencv2/core/types.hpp +++ b/IPL/include/opencv/opencv2/core/types.hpp @@ -41,8 +41,8 @@ // //M*/ -#ifndef __OPENCV_CORE_TYPES_HPP__ -#define __OPENCV_CORE_TYPES_HPP__ +#ifndef OPENCV_CORE_TYPES_HPP +#define OPENCV_CORE_TYPES_HPP #ifndef __cplusplus # error types.hpp header must be compiled as C++ @@ -51,6 +51,7 @@ #include #include #include +#include #include "opencv2/core/cvdef.h" #include "opencv2/core/cvstd.hpp" @@ -74,7 +75,7 @@ template class Complex { public: - //! constructors + //! default constructor Complex(); Complex( _Tp _re, _Tp _im = 0 ); @@ -97,14 +98,23 @@ template class DataType< Complex<_Tp> > typedef _Tp channel_type; enum { generic_type = 0, - depth = DataType::depth, channels = 2, - fmt = DataType::fmt + ((channels - 1) << 8), - type = CV_MAKETYPE(depth, channels) }; + fmt = DataType::fmt + ((channels - 1) << 8) +#ifdef OPENCV_TRAITS_ENABLE_DEPRECATED + ,depth = DataType::depth + ,type = CV_MAKETYPE(depth, channels) +#endif + }; typedef Vec vec_type; }; +namespace traits { +template +struct Depth< Complex<_Tp> > { enum { value = Depth<_Tp>::value }; }; +template +struct Type< Complex<_Tp> > { enum { value = CV_MAKETYPE(Depth<_Tp>::value, 2) }; }; +} // namespace //////////////////////////////// Point_ //////////////////////////////// @@ -149,14 +159,16 @@ template class Point_ public: typedef _Tp value_type; - // various constructors + //! default constructor Point_(); Point_(_Tp _x, _Tp _y); Point_(const Point_& pt); + Point_(Point_&& pt) CV_NOEXCEPT; Point_(const Size_<_Tp>& sz); Point_(const Vec<_Tp, 2>& v); Point_& operator = (const Point_& pt); + Point_& operator = (Point_&& pt) CV_NOEXCEPT; //! conversion to another data type template operator Point_<_Tp2>() const; @@ -171,11 +183,12 @@ template class Point_ double cross(const Point_& pt) const; //! checks whether the point is inside the specified rectangle bool inside(const Rect_<_Tp>& r) const; - - _Tp x, y; //< the point coordinates + _Tp x; //!< x coordinate of the point + _Tp y; //!< y coordinate of the point }; typedef Point_ Point2i; +typedef Point_ Point2l; typedef Point_ Point2f; typedef Point_ Point2d; typedef Point2i Point; @@ -188,15 +201,23 @@ template class DataType< Point_<_Tp> > typedef _Tp channel_type; enum { generic_type = 0, - depth = DataType::depth, channels = 2, - fmt = DataType::fmt + ((channels - 1) << 8), - type = CV_MAKETYPE(depth, channels) + fmt = traits::SafeFmt::fmt + ((channels - 1) << 8) +#ifdef OPENCV_TRAITS_ENABLE_DEPRECATED + ,depth = DataType::depth + ,type = CV_MAKETYPE(depth, channels) +#endif }; typedef Vec vec_type; }; +namespace traits { +template +struct Depth< Point_<_Tp> > { enum { value = Depth<_Tp>::value }; }; +template +struct Type< Point_<_Tp> > { enum { value = CV_MAKETYPE(Depth<_Tp>::value, 2) }; }; +} // namespace //////////////////////////////// Point3_ //////////////////////////////// @@ -220,14 +241,16 @@ template class Point3_ public: typedef _Tp value_type; - // various constructors + //! default constructor Point3_(); Point3_(_Tp _x, _Tp _y, _Tp _z); Point3_(const Point3_& pt); + Point3_(Point3_&& pt) CV_NOEXCEPT; explicit Point3_(const Point_<_Tp>& pt); Point3_(const Vec<_Tp, 3>& v); Point3_& operator = (const Point3_& pt); + Point3_& operator = (Point3_&& pt) CV_NOEXCEPT; //! conversion to another data type template operator Point3_<_Tp2>() const; //! conversion to cv::Vec<> @@ -239,8 +262,9 @@ template class Point3_ double ddot(const Point3_& pt) const; //! cross product of the 2 3D points Point3_ cross(const Point3_& pt) const; - - _Tp x, y, z; //< the point coordinates + _Tp x; //!< x coordinate of the 3D point + _Tp y; //!< y coordinate of the 3D point + _Tp z; //!< z coordinate of the 3D point }; typedef Point3_ Point3i; @@ -255,16 +279,23 @@ template class DataType< Point3_<_Tp> > typedef _Tp channel_type; enum { generic_type = 0, - depth = DataType::depth, channels = 3, - fmt = DataType::fmt + ((channels - 1) << 8), - type = CV_MAKETYPE(depth, channels) + fmt = traits::SafeFmt::fmt + ((channels - 1) << 8) +#ifdef OPENCV_TRAITS_ENABLE_DEPRECATED + ,depth = DataType::depth + ,type = CV_MAKETYPE(depth, channels) +#endif }; typedef Vec vec_type; }; - +namespace traits { +template +struct Depth< Point3_<_Tp> > { enum { value = Depth<_Tp>::value }; }; +template +struct Type< Point3_<_Tp> > { enum { value = CV_MAKETYPE(Depth<_Tp>::value, 3) }; }; +} // namespace //////////////////////////////// Size_ //////////////////////////////// @@ -286,23 +317,31 @@ template class Size_ public: typedef _Tp value_type; - //! various constructors + //! default constructor Size_(); Size_(_Tp _width, _Tp _height); Size_(const Size_& sz); + Size_(Size_&& sz) CV_NOEXCEPT; Size_(const Point_<_Tp>& pt); Size_& operator = (const Size_& sz); + Size_& operator = (Size_&& sz) CV_NOEXCEPT; //! the area (width*height) _Tp area() const; + //! aspect ratio (width/height) + double aspectRatio() const; + //! true if empty + bool empty() const; //! conversion of another data type. template operator Size_<_Tp2>() const; - _Tp width, height; // the width and the height + _Tp width; //!< the width + _Tp height; //!< the height }; typedef Size_ Size2i; +typedef Size_ Size2l; typedef Size_ Size2f; typedef Size_ Size2d; typedef Size2i Size; @@ -315,16 +354,23 @@ template class DataType< Size_<_Tp> > typedef _Tp channel_type; enum { generic_type = 0, - depth = DataType::depth, channels = 2, - fmt = DataType::fmt + ((channels - 1) << 8), - type = CV_MAKETYPE(depth, channels) + fmt = DataType::fmt + ((channels - 1) << 8) +#ifdef OPENCV_TRAITS_ENABLE_DEPRECATED + ,depth = DataType::depth + ,type = CV_MAKETYPE(depth, channels) +#endif }; typedef Vec vec_type; }; - +namespace traits { +template +struct Depth< Size_<_Tp> > { enum { value = Depth<_Tp>::value }; }; +template +struct Type< Size_<_Tp> > { enum { value = CV_MAKETYPE(Depth<_Tp>::value, 2) }; }; +} // namespace //////////////////////////////// Rect_ //////////////////////////////// @@ -376,14 +422,16 @@ template class Rect_ public: typedef _Tp value_type; - //! various constructors + //! default constructor Rect_(); Rect_(_Tp _x, _Tp _y, _Tp _width, _Tp _height); Rect_(const Rect_& r); + Rect_(Rect_&& r) CV_NOEXCEPT; Rect_(const Point_<_Tp>& org, const Size_<_Tp>& sz); Rect_(const Point_<_Tp>& pt1, const Point_<_Tp>& pt2); Rect_& operator = ( const Rect_& r ); + Rect_& operator = ( Rect_&& r ) CV_NOEXCEPT; //! the top-left corner Point_<_Tp> tl() const; //! the bottom-right corner @@ -393,6 +441,8 @@ template class Rect_ Size_<_Tp> size() const; //! area (width*height) of the rectangle _Tp area() const; + //! true if empty + bool empty() const; //! conversion to another data type template operator Rect_<_Tp2>() const; @@ -400,7 +450,10 @@ template class Rect_ //! checks whether the rectangle contains the point bool contains(const Point_<_Tp>& pt) const; - _Tp x, y, width, height; //< the top-left corner, as well as width and height of the rectangle + _Tp x; //!< x coordinate of the top-left corner + _Tp y; //!< y coordinate of the top-left corner + _Tp width; //!< width of the rectangle + _Tp height; //!< height of the rectangle }; typedef Rect_ Rect2i; @@ -416,40 +469,33 @@ template class DataType< Rect_<_Tp> > typedef _Tp channel_type; enum { generic_type = 0, - depth = DataType::depth, channels = 4, - fmt = DataType::fmt + ((channels - 1) << 8), - type = CV_MAKETYPE(depth, channels) + fmt = traits::SafeFmt::fmt + ((channels - 1) << 8) +#ifdef OPENCV_TRAITS_ENABLE_DEPRECATED + ,depth = DataType::depth + ,type = CV_MAKETYPE(depth, channels) +#endif }; typedef Vec vec_type; }; - +namespace traits { +template +struct Depth< Rect_<_Tp> > { enum { value = Depth<_Tp>::value }; }; +template +struct Type< Rect_<_Tp> > { enum { value = CV_MAKETYPE(Depth<_Tp>::value, 4) }; }; +} // namespace ///////////////////////////// RotatedRect ///////////////////////////// /** @brief The class represents rotated (i.e. not up-right) rectangles on a plane. Each rectangle is specified by the center point (mass center), length of each side (represented by -cv::Size2f structure) and the rotation angle in degrees. +#Size2f structure) and the rotation angle in degrees. The sample below demonstrates how to use RotatedRect: -@code - Mat image(200, 200, CV_8UC3, Scalar(0)); - RotatedRect rRect = RotatedRect(Point2f(100,100), Size2f(100,50), 30); - - Point2f vertices[4]; - rRect.points(vertices); - for (int i = 0; i < 4; i++) - line(image, vertices[i], vertices[(i+1)%4], Scalar(0,255,0)); - - Rect brect = rRect.boundingRect(); - rectangle(image, brect, Scalar(255,0,0)); - - imshow("rectangles", image); - waitKey(0); -@endcode +@snippet snippets/core_various.cpp RotatedRect_demo ![image](pics/rotatedrect.png) @sa CamShift, fitEllipse, minAreaRect, CvBox2D @@ -457,9 +503,9 @@ The sample below demonstrates how to use RotatedRect: class CV_EXPORTS RotatedRect { public: - //! various constructors + //! default constructor RotatedRect(); - /** + /** full constructor @param center The rectangle mass center. @param size Width and height of the rectangle. @param angle The rotation angle in a clockwise direction. When the angle is 0, 90, 180, 270 etc., @@ -473,15 +519,19 @@ class CV_EXPORTS RotatedRect RotatedRect(const Point2f& point1, const Point2f& point2, const Point2f& point3); /** returns 4 vertices of the rectangle - @param pts The points array for storing rectangle vertices. + @param pts The points array for storing rectangle vertices. The order is bottomLeft, topLeft, topRight, bottomRight. */ void points(Point2f pts[]) const; - //! returns the minimal up-right rectangle containing the rotated rectangle + //! returns the minimal up-right integer rectangle containing the rotated rectangle Rect boundingRect() const; - - Point2f center; //< the rectangle mass center - Size2f size; //< width and height of the rectangle - float angle; //< the rotation angle. When the angle is 0, 90, 180, 270 etc., the rectangle becomes an up-right rectangle. + //! returns the minimal (exact) floating point rectangle containing the rotated rectangle, not intended for use with images + Rect_ boundingRect2f() const; + //! returns the rectangle mass center + Point2f center; + //! returns width and height of the rectangle + Size2f size; + //! returns the rotation angle. When the angle is 0, 90, 180, 270 etc., the rectangle becomes an up-right rectangle. + float angle; }; template<> class DataType< RotatedRect > @@ -492,15 +542,23 @@ template<> class DataType< RotatedRect > typedef float channel_type; enum { generic_type = 0, - depth = DataType::depth, channels = (int)sizeof(value_type)/sizeof(channel_type), // 5 - fmt = DataType::fmt + ((channels - 1) << 8), - type = CV_MAKETYPE(depth, channels) + fmt = traits::SafeFmt::fmt + ((channels - 1) << 8) +#ifdef OPENCV_TRAITS_ENABLE_DEPRECATED + ,depth = DataType::depth + ,type = CV_MAKETYPE(depth, channels) +#endif }; typedef Vec vec_type; }; +namespace traits { +template<> +struct Depth< RotatedRect > { enum { value = Depth::value }; }; +template<> +struct Type< RotatedRect > { enum { value = CV_MAKETYPE(Depth::value, (int)sizeof(RotatedRect)/sizeof(float)) }; }; +} // namespace //////////////////////////////// Range ///////////////////////////////// @@ -548,33 +606,47 @@ template<> class DataType typedef int channel_type; enum { generic_type = 0, - depth = DataType::depth, channels = 2, - fmt = DataType::fmt + ((channels - 1) << 8), - type = CV_MAKETYPE(depth, channels) + fmt = traits::SafeFmt::fmt + ((channels - 1) << 8) +#ifdef OPENCV_TRAITS_ENABLE_DEPRECATED + ,depth = DataType::depth + ,type = CV_MAKETYPE(depth, channels) +#endif }; typedef Vec vec_type; }; +namespace traits { +template<> +struct Depth< Range > { enum { value = Depth::value }; }; +template<> +struct Type< Range > { enum { value = CV_MAKETYPE(Depth::value, 2) }; }; +} // namespace //////////////////////////////// Scalar_ /////////////////////////////// /** @brief Template class for a 4-element vector derived from Vec. -Being derived from Vec\<_Tp, 4\> , Scalar_ and Scalar can be used just as typical 4-element +Being derived from Vec\<_Tp, 4\> , Scalar\_ and Scalar can be used just as typical 4-element vectors. In addition, they can be converted to/from CvScalar . The type Scalar is widely used in OpenCV to pass pixel values. */ template class Scalar_ : public Vec<_Tp, 4> { public: - //! various constructors + //! default constructor Scalar_(); Scalar_(_Tp v0, _Tp v1, _Tp v2=0, _Tp v3=0); Scalar_(_Tp v0); + Scalar_(const Scalar_& s); + Scalar_(Scalar_&& s) CV_NOEXCEPT; + + Scalar_& operator=(const Scalar_& s); + Scalar_& operator=(Scalar_&& s) CV_NOEXCEPT; + template Scalar_(const Vec<_Tp2, cn>& v); @@ -587,10 +659,10 @@ template class Scalar_ : public Vec<_Tp, 4> //! per-element product Scalar_<_Tp> mul(const Scalar_<_Tp>& a, double scale=1 ) const; - // returns (v0, -v1, -v2, -v3) + //! returns (v0, -v1, -v2, -v3) Scalar_<_Tp> conj() const; - // returns true iff v1 == v2 == v3 == 0 + //! returns true iff v1 == v2 == v3 == 0 bool isReal() const; }; @@ -604,15 +676,23 @@ template class DataType< Scalar_<_Tp> > typedef _Tp channel_type; enum { generic_type = 0, - depth = DataType::depth, channels = 4, - fmt = DataType::fmt + ((channels - 1) << 8), - type = CV_MAKETYPE(depth, channels) + fmt = traits::SafeFmt::fmt + ((channels - 1) << 8) +#ifdef OPENCV_TRAITS_ENABLE_DEPRECATED + ,depth = DataType::depth + ,type = CV_MAKETYPE(depth, channels) +#endif }; typedef Vec vec_type; }; +namespace traits { +template +struct Depth< Scalar_<_Tp> > { enum { value = Depth<_Tp>::value }; }; +template +struct Type< Scalar_<_Tp> > { enum { value = CV_MAKETYPE(Depth<_Tp>::value, 4) }; }; +} // namespace /////////////////////////////// KeyPoint //////////////////////////////// @@ -620,14 +700,13 @@ template class DataType< Scalar_<_Tp> > /** @brief Data structure for salient point detectors. The class instance stores a keypoint, i.e. a point feature found by one of many available keypoint -detectors, such as Harris corner detector, cv::FAST, cv::StarDetector, cv::SURF, cv::SIFT, -cv::LDetector etc. +detectors, such as Harris corner detector, #FAST, %StarDetector, %SURF, %SIFT etc. The keypoint is characterized by the 2D position, scale (proportional to the diameter of the neighborhood that needs to be taken into account), orientation and some other parameters. The keypoint neighborhood is then analyzed by another algorithm that builds a descriptor (usually represented as a feature vector). The keypoints representing the same object in different images -can then be matched using cv::KDTree or another method. +can then be matched using %KDTree or another method. */ class CV_EXPORTS_W_SIMPLE KeyPoint { @@ -699,6 +778,7 @@ class CV_EXPORTS_W_SIMPLE KeyPoint CV_PROP_RW int class_id; //!< object class (if the keypoints need to be clustered by an object they belong to) }; +#ifdef OPENCV_TRAITS_ENABLE_DEPRECATED template<> class DataType { public: @@ -715,7 +795,7 @@ template<> class DataType typedef Vec vec_type; }; - +#endif //////////////////////////////// DMatch ///////////////////////////////// @@ -732,9 +812,9 @@ class CV_EXPORTS_W_SIMPLE DMatch CV_WRAP DMatch(int _queryIdx, int _trainIdx, float _distance); CV_WRAP DMatch(int _queryIdx, int _trainIdx, int _imgIdx, float _distance); - CV_PROP_RW int queryIdx; // query descriptor index - CV_PROP_RW int trainIdx; // train descriptor index - CV_PROP_RW int imgIdx; // train image index + CV_PROP_RW int queryIdx; //!< query descriptor index + CV_PROP_RW int trainIdx; //!< train descriptor index + CV_PROP_RW int imgIdx; //!< train image index CV_PROP_RW float distance; @@ -742,6 +822,7 @@ class CV_EXPORTS_W_SIMPLE DMatch bool operator<(const DMatch &m) const; }; +#ifdef OPENCV_TRAITS_ENABLE_DEPRECATED template<> class DataType { public: @@ -758,7 +839,7 @@ template<> class DataType typedef Vec vec_type; }; - +#endif ///////////////////////////// TermCriteria ////////////////////////////// @@ -790,9 +871,16 @@ class CV_EXPORTS TermCriteria */ TermCriteria(int type, int maxCount, double epsilon); + inline bool isValid() const + { + const bool isCount = (type & COUNT) && maxCount > 0; + const bool isEps = (type & EPS) && !cvIsNaN(epsilon); + return isCount || isEps; + } + int type; //!< the type of termination criteria: COUNT, EPS or COUNT + EPS - int maxCount; // the maximum number of iterations/elements - double epsilon; // the desired accuracy + int maxCount; //!< the maximum number of iterations/elements + double epsilon; //!< the desired accuracy }; @@ -872,15 +960,24 @@ template<> class DataType typedef double channel_type; enum { generic_type = 0, - depth = DataType::depth, channels = (int)(sizeof(value_type)/sizeof(channel_type)), // 24 - fmt = DataType::fmt + ((channels - 1) << 8), - type = CV_MAKETYPE(depth, channels) + fmt = DataType::fmt + ((channels - 1) << 8) +#ifdef OPENCV_TRAITS_ENABLE_DEPRECATED + ,depth = DataType::depth + ,type = CV_MAKETYPE(depth, channels) +#endif }; typedef Vec vec_type; }; +namespace traits { +template<> +struct Depth< Moments > { enum { value = Depth::value }; }; +template<> +struct Type< Moments > { enum { value = CV_MAKETYPE(Depth::value, (int)(sizeof(Moments)/sizeof(double))) }; }; +} // namespace + //! @} imgproc_shape //! @cond IGNORED @@ -1031,7 +1128,8 @@ Complex<_Tp> operator / (const Complex<_Tp>& a, const Complex<_Tp>& b) template static inline Complex<_Tp>& operator /= (Complex<_Tp>& a, const Complex<_Tp>& b) { - return (a = a / b); + a = a / b; + return a; } template static inline @@ -1070,6 +1168,10 @@ template inline Point_<_Tp>::Point_(const Point_& pt) : x(pt.x), y(pt.y) {} +template inline +Point_<_Tp>::Point_(Point_&& pt) CV_NOEXCEPT + : x(std::move(pt.x)), y(std::move(pt.y)) {} + template inline Point_<_Tp>::Point_(const Size_<_Tp>& sz) : x(sz.width), y(sz.height) {} @@ -1085,6 +1187,13 @@ Point_<_Tp>& Point_<_Tp>::operator = (const Point_& pt) return *this; } +template inline +Point_<_Tp>& Point_<_Tp>::operator = (Point_&& pt) CV_NOEXCEPT +{ + x = std::move(pt.x); y = std::move(pt.y); + return *this; +} + template template inline Point_<_Tp>::operator Point_<_Tp2>() const { @@ -1106,7 +1215,7 @@ _Tp Point_<_Tp>::dot(const Point_& pt) const template inline double Point_<_Tp>::ddot(const Point_& pt) const { - return (double)x*pt.x + (double)y*pt.y; + return (double)x*(double)(pt.x) + (double)y*(double)(pt.y); } template inline @@ -1297,6 +1406,20 @@ Point_<_Tp> operator / (const Point_<_Tp>& a, double b) } +template static inline _AccTp normL2Sqr(const Point_& pt); +template static inline _AccTp normL2Sqr(const Point_& pt); +template static inline _AccTp normL2Sqr(const Point_& pt); +template static inline _AccTp normL2Sqr(const Point_& pt); + +template<> inline int normL2Sqr(const Point_& pt) { return pt.dot(pt); } +template<> inline int64 normL2Sqr(const Point_& pt) { return pt.dot(pt); } +template<> inline float normL2Sqr(const Point_& pt) { return pt.dot(pt); } +template<> inline double normL2Sqr(const Point_& pt) { return pt.dot(pt); } + +template<> inline double normL2Sqr(const Point_& pt) { return pt.ddot(pt); } +template<> inline double normL2Sqr(const Point_& pt) { return pt.ddot(pt); } + + //////////////////////////////// 3D Point /////////////////////////////// @@ -1312,6 +1435,10 @@ template inline Point3_<_Tp>::Point3_(const Point3_& pt) : x(pt.x), y(pt.y), z(pt.z) {} +template inline +Point3_<_Tp>::Point3_(Point3_&& pt) CV_NOEXCEPT + : x(std::move(pt.x)), y(std::move(pt.y)), z(std::move(pt.z)) {} + template inline Point3_<_Tp>::Point3_(const Point_<_Tp>& pt) : x(pt.x), y(pt.y), z(_Tp()) {} @@ -1339,6 +1466,13 @@ Point3_<_Tp>& Point3_<_Tp>::operator = (const Point3_& pt) return *this; } +template inline +Point3_<_Tp>& Point3_<_Tp>::operator = (Point3_&& pt) CV_NOEXCEPT +{ + x = std::move(pt.x); y = std::move(pt.y); z = std::move(pt.z); + return *this; +} + template inline _Tp Point3_<_Tp>::dot(const Point3_& pt) const { @@ -1555,6 +1689,10 @@ template inline Size_<_Tp>::Size_(const Size_& sz) : width(sz.width), height(sz.height) {} +template inline +Size_<_Tp>::Size_(Size_&& sz) CV_NOEXCEPT + : width(std::move(sz.width)), height(std::move(sz.height)) {} + template inline Size_<_Tp>::Size_(const Point_<_Tp>& pt) : width(pt.x), height(pt.y) {} @@ -1572,12 +1710,35 @@ Size_<_Tp>& Size_<_Tp>::operator = (const Size_<_Tp>& sz) return *this; } +template inline +Size_<_Tp>& Size_<_Tp>::operator = (Size_<_Tp>&& sz) CV_NOEXCEPT +{ + width = std::move(sz.width); height = std::move(sz.height); + return *this; +} + template inline _Tp Size_<_Tp>::area() const { - return width * height; + const _Tp result = width * height; + CV_DbgAssert(!std::numeric_limits<_Tp>::is_integer + || width == 0 || result / width == height); // make sure the result fits in the return value + return result; +} + +template inline +double Size_<_Tp>::aspectRatio() const +{ + return width / static_cast(height); +} + +template inline +bool Size_<_Tp>::empty() const +{ + return width <= 0 || height <= 0; } + template static inline Size_<_Tp>& operator *= (Size_<_Tp>& a, _Tp b) { @@ -1670,6 +1831,10 @@ template inline Rect_<_Tp>::Rect_(const Rect_<_Tp>& r) : x(r.x), y(r.y), width(r.width), height(r.height) {} +template inline +Rect_<_Tp>::Rect_(Rect_<_Tp>&& r) CV_NOEXCEPT + : x(std::move(r.x)), y(std::move(r.y)), width(std::move(r.width)), height(std::move(r.height)) {} + template inline Rect_<_Tp>::Rect_(const Point_<_Tp>& org, const Size_<_Tp>& sz) : x(org.x), y(org.y), width(sz.width), height(sz.height) {} @@ -1693,6 +1858,16 @@ Rect_<_Tp>& Rect_<_Tp>::operator = ( const Rect_<_Tp>& r ) return *this; } +template inline +Rect_<_Tp>& Rect_<_Tp>::operator = ( Rect_<_Tp>&& r ) CV_NOEXCEPT +{ + x = std::move(r.x); + y = std::move(r.y); + width = std::move(r.width); + height = std::move(r.height); + return *this; +} + template inline Point_<_Tp> Rect_<_Tp>::tl() const { @@ -1714,7 +1889,16 @@ Size_<_Tp> Rect_<_Tp>::size() const template inline _Tp Rect_<_Tp>::area() const { - return width * height; + const _Tp result = width * height; + CV_DbgAssert(!std::numeric_limits<_Tp>::is_integer + || width == 0 || result / width == height); // make sure the result fits in the return value + return result; +} + +template inline +bool Rect_<_Tp>::empty() const +{ + return width <= 0 || height <= 0; } template template inline @@ -1757,8 +1941,11 @@ Rect_<_Tp>& operator += ( Rect_<_Tp>& a, const Size_<_Tp>& b ) template static inline Rect_<_Tp>& operator -= ( Rect_<_Tp>& a, const Size_<_Tp>& b ) { - a.width -= b.width; - a.height -= b.height; + const _Tp width = a.width - b.width; + const _Tp height = a.height - b.height; + CV_DbgAssert(width >= 0 && height >= 0); + a.width = width; + a.height = height; return a; } @@ -1779,12 +1966,17 @@ Rect_<_Tp>& operator &= ( Rect_<_Tp>& a, const Rect_<_Tp>& b ) template static inline Rect_<_Tp>& operator |= ( Rect_<_Tp>& a, const Rect_<_Tp>& b ) { - _Tp x1 = std::min(a.x, b.x); - _Tp y1 = std::min(a.y, b.y); - a.width = std::max(a.x + a.width, b.x + b.width) - x1; - a.height = std::max(a.y + a.height, b.y + b.height) - y1; - a.x = x1; - a.y = y1; + if (a.empty()) { + a = b; + } + else if (!b.empty()) { + _Tp x1 = std::min(a.x, b.x); + _Tp y1 = std::min(a.y, b.y); + a.width = std::max(a.x + a.width, b.x + b.width) - x1; + a.height = std::max(a.y + a.height, b.y + b.height) - y1; + a.x = x1; + a.y = y1; + } return a; } @@ -1818,6 +2010,15 @@ Rect_<_Tp> operator + (const Rect_<_Tp>& a, const Size_<_Tp>& b) return Rect_<_Tp>( a.x, a.y, a.width + b.width, a.height + b.height ); } +template static inline +Rect_<_Tp> operator - (const Rect_<_Tp>& a, const Size_<_Tp>& b) +{ + const _Tp width = a.width - b.width; + const _Tp height = a.height - b.height; + CV_DbgAssert(width >= 0 && height >= 0); + return Rect_<_Tp>( a.x, a.y, width, height ); +} + template static inline Rect_<_Tp> operator & (const Rect_<_Tp>& a, const Rect_<_Tp>& b) { @@ -1832,7 +2033,26 @@ Rect_<_Tp> operator | (const Rect_<_Tp>& a, const Rect_<_Tp>& b) return c |= b; } +/** + * @brief measure dissimilarity between two sample sets + * + * computes the complement of the Jaccard Index as described in . + * For rectangles this reduces to computing the intersection over the union. + */ +template static inline +double jaccardDistance(const Rect_<_Tp>& a, const Rect_<_Tp>& b) { + _Tp Aa = a.area(); + _Tp Ab = b.area(); + + if ((Aa + Ab) <= std::numeric_limits<_Tp>::epsilon()) { + // jaccard_index = 1 -> distance = 0 + return 0.0; + } + double Aab = (a & b).area(); + // distance = 1 - jaccard_index + return 1.0 - Aab / (Aa + Ab - Aab); +} ////////////////////////////// RotatedRect ////////////////////////////// @@ -1844,8 +2064,6 @@ inline RotatedRect::RotatedRect(const Point2f& _center, const Size2f& _size, float _angle) : center(_center), size(_size), angle(_angle) {} - - ///////////////////////////////// Range ///////////////////////////////// inline @@ -1945,6 +2163,36 @@ Scalar_<_Tp>::Scalar_(_Tp v0, _Tp v1, _Tp v2, _Tp v3) this->val[3] = v3; } +template inline +Scalar_<_Tp>::Scalar_(const Scalar_<_Tp>& s) : Vec<_Tp, 4>(s) { +} + +template inline +Scalar_<_Tp>::Scalar_(Scalar_<_Tp>&& s) CV_NOEXCEPT { + this->val[0] = std::move(s.val[0]); + this->val[1] = std::move(s.val[1]); + this->val[2] = std::move(s.val[2]); + this->val[3] = std::move(s.val[3]); +} + +template inline +Scalar_<_Tp>& Scalar_<_Tp>::operator=(const Scalar_<_Tp>& s) { + this->val[0] = s.val[0]; + this->val[1] = s.val[1]; + this->val[2] = s.val[2]; + this->val[3] = s.val[3]; + return *this; +} + +template inline +Scalar_<_Tp>& Scalar_<_Tp>::operator=(Scalar_<_Tp>&& s) CV_NOEXCEPT { + this->val[0] = std::move(s.val[0]); + this->val[1] = std::move(s.val[1]); + this->val[2] = std::move(s.val[2]); + this->val[3] = std::move(s.val[3]); + return *this; +} + template template inline Scalar_<_Tp>::Scalar_(const Vec<_Tp2, cn>& v) { @@ -2225,4 +2473,4 @@ TermCriteria::TermCriteria(int _type, int _maxCount, double _epsilon) } // cv -#endif //__OPENCV_CORE_TYPES_HPP__ +#endif //OPENCV_CORE_TYPES_HPP diff --git a/IPL/include/opencv/opencv2/core/types_c.h b/IPL/include/opencv/opencv2/core/types_c.h index cb39587..e453626 100644 --- a/IPL/include/opencv/opencv2/core/types_c.h +++ b/IPL/include/opencv/opencv2/core/types_c.h @@ -41,12 +41,37 @@ // //M*/ -#ifndef __OPENCV_CORE_TYPES_H__ -#define __OPENCV_CORE_TYPES_H__ +#ifndef OPENCV_CORE_TYPES_H +#define OPENCV_CORE_TYPES_H + +#ifdef CV__ENABLE_C_API_CTORS // invalid C API ctors (must be removed) +#if defined(_WIN32) && !defined(CV__SKIP_MESSAGE_MALFORMED_C_API_CTORS) +#error "C API ctors don't work on Win32: https://github.com/opencv/opencv/issues/15990" +#endif +#endif + +//#define CV__VALIDATE_UNUNITIALIZED_VARS 1 // C++11 & GCC only + +#ifdef __cplusplus + +#ifdef CV__VALIDATE_UNUNITIALIZED_VARS +#pragma GCC diagnostic ignored "-Wmissing-field-initializers" +#define CV_STRUCT_INITIALIZER {0,} +#else +#if defined(__GNUC__) && __GNUC__ == 4 // GCC 4.x warns on "= {}" initialization, fixed in GCC 5.0 +#pragma GCC diagnostic ignored "-Wmissing-field-initializers" +#endif +#define CV_STRUCT_INITIALIZER {} +#endif + +#else +#define CV_STRUCT_INITIALIZER {0} +#endif + #ifdef HAVE_IPL # ifndef __IPL_H__ -# if defined WIN32 || defined _WIN32 +# if defined _WIN32 # include # else # include @@ -65,7 +90,7 @@ #include #endif // SKIP_INCLUDES -#if defined WIN32 || defined _WIN32 +#if defined _WIN32 # define CV_CDECL __cdecl # define CV_STDCALL __stdcall #else @@ -130,24 +155,24 @@ enum { CV_BadImageSize= -10, /**< image size is invalid */ CV_BadOffset= -11, /**< offset is invalid */ CV_BadDataPtr= -12, /**/ - CV_BadStep= -13, /**/ + CV_BadStep= -13, /**< image step is wrong, this may happen for a non-continuous matrix */ CV_BadModelOrChSeq= -14, /**/ - CV_BadNumChannels= -15, /**/ + CV_BadNumChannels= -15, /**< bad number of channels, for example, some functions accept only single channel matrices */ CV_BadNumChannel1U= -16, /**/ - CV_BadDepth= -17, /**/ + CV_BadDepth= -17, /**< input image depth is not supported by the function */ CV_BadAlphaChannel= -18, /**/ - CV_BadOrder= -19, /**/ - CV_BadOrigin= -20, /**/ - CV_BadAlign= -21, /**/ + CV_BadOrder= -19, /**< number of dimensions is out of range */ + CV_BadOrigin= -20, /**< incorrect input origin */ + CV_BadAlign= -21, /**< incorrect input align */ CV_BadCallBack= -22, /**/ CV_BadTileSize= -23, /**/ - CV_BadCOI= -24, /**/ - CV_BadROISize= -25, /**/ + CV_BadCOI= -24, /**< input COI is not supported */ + CV_BadROISize= -25, /**< incorrect input roi */ CV_MaskIsTiled= -26, /**/ CV_StsNullPtr= -27, /**< null pointer */ CV_StsVecLengthErr= -28, /**< incorrect vector length */ - CV_StsFilterStructContentErr= -29, /**< incorr. filter structure content */ - CV_StsKernelStructContentErr= -30, /**< incorr. transform kernel content */ + CV_StsFilterStructContentErr= -29, /**< incorrect filter structure content */ + CV_StsKernelStructContentErr= -30, /**< incorrect transform kernel content */ CV_StsFilterOffsetErr= -31, /**< incorrect filter offset value */ CV_StsBadSize= -201, /**< the input/output structure size is incorrect */ CV_StsDivByZero= -202, /**< division by zero */ @@ -163,14 +188,14 @@ enum { CV_StsParseError= -212, /**< invalid syntax/structure of the parsed file */ CV_StsNotImplemented= -213, /**< the requested function/feature is not implemented */ CV_StsBadMemBlock= -214, /**< an allocated block has been corrupted */ - CV_StsAssert= -215, /**< assertion failed */ - CV_GpuNotSupported= -216, - CV_GpuApiCallError= -217, - CV_OpenGlNotSupported= -218, - CV_OpenGlApiCallError= -219, - CV_OpenCLApiCallError= -220, + CV_StsAssert= -215, /**< assertion failed */ + CV_GpuNotSupported= -216, /**< no CUDA support */ + CV_GpuApiCallError= -217, /**< GPU API call error */ + CV_OpenGlNotSupported= -218, /**< no OpenGL support */ + CV_OpenGlApiCallError= -219, /**< OpenGL API call error */ + CV_OpenCLApiCallError= -220, /**< OpenCL API call error */ CV_OpenCLDoubleNotSupported= -221, - CV_OpenCLInitError= -222, + CV_OpenCLInitError= -222, /**< OpenCL initialization error */ CV_OpenCLNoAMDBlasFft= -223 }; @@ -285,6 +310,11 @@ CV_INLINE double cvRandReal( CvRNG* rng ) #define IPL_BORDER_REFLECT 2 #define IPL_BORDER_WRAP 3 +#ifdef __cplusplus +typedef struct _IplImage IplImage; +CV_EXPORTS _IplImage cvIplImage(const cv::Mat& m); +#endif + /** The IplImage is taken from the Intel Image Processing Library, in which the format is native. OpenCV only supports a subset of possible IplImage formats, as outlined in the parameter list above. @@ -294,9 +324,6 @@ hand, the Intel Image Processing Library processes the area of intersection betw destination images (or ROIs), allowing them to vary independently. */ typedef struct -#ifdef __cplusplus - CV_EXPORTS -#endif _IplImage { int nSize; /**< sizeof(IplImage) */ @@ -330,13 +357,22 @@ _IplImage (not necessarily aligned) - needed for correct deallocation */ -#ifdef __cplusplus +#if defined(CV__ENABLE_C_API_CTORS) && defined(__cplusplus) _IplImage() {} - _IplImage(const cv::Mat& m); + _IplImage(const cv::Mat& m) { *this = cvIplImage(m); } #endif } IplImage; +CV_INLINE IplImage cvIplImage() +{ +#if !(defined(CV__ENABLE_C_API_CTORS) && defined(__cplusplus)) + IplImage self = CV_STRUCT_INITIALIZER; self.nSize = sizeof(IplImage); return self; +#else + return _IplImage(); +#endif +} + typedef struct _IplTileInfo IplTileInfo; typedef struct _IplROI @@ -409,6 +445,11 @@ IplConvKernelFP; #define CV_MAT_MAGIC_VAL 0x42420000 #define CV_TYPE_NAME_MAT "opencv-matrix" +#ifdef __cplusplus +typedef struct CvMat CvMat; +CV_INLINE CvMat cvMat(const cv::Mat& m); +#endif + /** Matrix elements are stored row by row. Element (i, j) (i - 0-based row index, j - 0-based column index) of a matrix can be retrieved or modified using CV_MAT_ELEM macro: @@ -455,13 +496,10 @@ typedef struct CvMat int cols; #endif - -#ifdef __cplusplus +#if defined(CV__ENABLE_C_API_CTORS) && defined(__cplusplus) CvMat() {} - CvMat(const CvMat& m) { memcpy(this, &m, sizeof(CvMat));} - CvMat(const cv::Mat& m); + CvMat(const cv::Mat& m) { *this = cvMat(m); } #endif - } CvMat; @@ -524,14 +562,34 @@ CV_INLINE CvMat cvMat( int rows, int cols, int type, void* data CV_DEFAULT(NULL) } #ifdef __cplusplus -inline CvMat::CvMat(const cv::Mat& m) + +CV_INLINE CvMat cvMat(const cv::Mat& m) { + CvMat self; CV_DbgAssert(m.dims <= 2); - *this = cvMat(m.rows, m.dims == 1 ? 1 : m.cols, m.type(), m.data); - step = (int)m.step[0]; - type = (type & ~cv::Mat::CONTINUOUS_FLAG) | (m.flags & cv::Mat::CONTINUOUS_FLAG); + self = cvMat(m.rows, m.dims == 1 ? 1 : m.cols, m.type(), m.data); + self.step = (int)m.step[0]; + self.type = (self.type & ~cv::Mat::CONTINUOUS_FLAG) | (m.flags & cv::Mat::CONTINUOUS_FLAG); + return self; +} +CV_INLINE CvMat cvMat() +{ +#if !defined(CV__ENABLE_C_API_CTORS) + CvMat self = CV_STRUCT_INITIALIZER; return self; +#else + return CvMat(); +#endif } +CV_INLINE CvMat cvMat(const CvMat& m) +{ +#if !defined(CV__ENABLE_C_API_CTORS) + CvMat self = CV_STRUCT_INITIALIZER; memcpy(&self, &m, sizeof(self)); return self; +#else + return CvMat(m); #endif +} + +#endif // __cplusplus #define CV_MAT_ELEM_PTR_FAST( mat, row, col, pix_size ) \ @@ -614,15 +672,16 @@ CV_INLINE int cvIplDepth( int type ) #define CV_TYPE_NAME_MATND "opencv-nd-matrix" #define CV_MAX_DIM 32 -#define CV_MAX_DIM_HEAP 1024 + +#ifdef __cplusplus +typedef struct CvMatND CvMatND; +CV_EXPORTS CvMatND cvMatND(const cv::Mat& m); +#endif /** @deprecated consider using cv::Mat instead */ typedef struct -#ifdef __cplusplus - CV_EXPORTS -#endif CvMatND { int type; @@ -647,13 +706,23 @@ CvMatND } dim[CV_MAX_DIM]; -#ifdef __cplusplus +#if defined(CV__ENABLE_C_API_CTORS) && defined(__cplusplus) CvMatND() {} - CvMatND(const cv::Mat& m); + CvMatND(const cv::Mat& m) { *this = cvMatND(m); } #endif } CvMatND; + +CV_INLINE CvMatND cvMatND() +{ +#if !(defined(CV__ENABLE_C_API_CTORS) && defined(__cplusplus)) + CvMatND self = CV_STRUCT_INITIALIZER; return self; +#else + return CvMatND(); +#endif +} + #define CV_IS_MATND_HDR(mat) \ ((mat) != NULL && (((const CvMatND*)(mat))->type & CV_MAGIC_MASK) == CV_MATND_MAGIC_VAL) @@ -670,11 +739,7 @@ CvMatND; struct CvSet; -typedef struct -#ifdef __cplusplus - CV_EXPORTS -#endif -CvSparseMat +typedef struct CvSparseMat { int type; int dims; @@ -689,13 +754,13 @@ CvSparseMat int size[CV_MAX_DIM]; #ifdef __cplusplus - void copyToSparseMat(cv::SparseMat& m) const; + CV_EXPORTS void copyToSparseMat(cv::SparseMat& m) const; #endif } CvSparseMat; #ifdef __cplusplus - CV_EXPORTS CvSparseMat* cvCreateSparseMat(const cv::SparseMat& m); +CV_EXPORTS CvSparseMat* cvCreateSparseMat(const cv::SparseMat& m); #endif #define CV_IS_SPARSE_MAT_HDR(mat) \ @@ -782,10 +847,23 @@ typedef struct CvRect int width; int height; -#ifdef __cplusplus +#ifdef CV__VALIDATE_UNUNITIALIZED_VARS + CvRect() __attribute__(( warning("Non-initialized variable") )) {}; + template CvRect(const std::initializer_list<_Tp> list) + { + CV_Assert(list.size() == 0 || list.size() == 4); + x = y = width = height = 0; + if (list.size() == 4) + { + x = list.begin()[0]; y = list.begin()[1]; width = list.begin()[2]; height = list.begin()[3]; + } + }; +#elif defined(CV__ENABLE_C_API_CTORS) && defined(__cplusplus) CvRect(int _x = 0, int _y = 0, int w = 0, int h = 0): x(_x), y(_y), width(w), height(h) {} template CvRect(const cv::Rect_<_Tp>& r): x(cv::saturate_cast(r.x)), y(cv::saturate_cast(r.y)), width(cv::saturate_cast(r.width)), height(cv::saturate_cast(r.height)) {} +#endif +#ifdef __cplusplus template operator cv::Rect_<_Tp>() const { return cv::Rect_<_Tp>((_Tp)x, (_Tp)y, (_Tp)width, (_Tp)height); } #endif @@ -795,16 +873,16 @@ CvRect; /** constructs CvRect structure. */ CV_INLINE CvRect cvRect( int x, int y, int width, int height ) { - CvRect r; - - r.x = x; - r.y = y; - r.width = width; - r.height = height; - +#if !(defined(CV__ENABLE_C_API_CTORS) && defined(__cplusplus)) + CvRect r = {x, y, width, height}; +#else + CvRect r(x, y , width, height); +#endif return r; } - +#ifdef __cplusplus +CV_INLINE CvRect cvRect(const cv::Rect& rc) { return cvRect(rc.x, rc.y, rc.width, rc.height); } +#endif CV_INLINE IplROI cvRectToROI( CvRect rect, int coi ) { @@ -826,39 +904,41 @@ CV_INLINE CvRect cvROIToRect( IplROI roi ) /*********************************** CvTermCriteria *************************************/ -#define CV_TERMCRIT_ITER 1 -#define CV_TERMCRIT_NUMBER CV_TERMCRIT_ITER -#define CV_TERMCRIT_EPS 2 +#define cv::TermCriteria::MAX_ITER 1 +#define CV_TERMCRIT_NUMBER cv::TermCriteria::MAX_ITER +#define cv::TermCriteria::EPS 2 /** @sa TermCriteria */ typedef struct CvTermCriteria { int type; /**< may be combination of - CV_TERMCRIT_ITER - CV_TERMCRIT_EPS */ + cv::TermCriteria::MAX_ITER + cv::TermCriteria::EPS */ int max_iter; double epsilon; - -#ifdef __cplusplus +#if defined(CV__ENABLE_C_API_CTORS) && defined(__cplusplus) CvTermCriteria(int _type = 0, int _iter = 0, double _eps = 0) : type(_type), max_iter(_iter), epsilon(_eps) {} CvTermCriteria(const cv::TermCriteria& t) : type(t.type), max_iter(t.maxCount), epsilon(t.epsilon) {} +#endif +#ifdef __cplusplus operator cv::TermCriteria() const { return cv::TermCriteria(type, max_iter, epsilon); } #endif - } CvTermCriteria; CV_INLINE CvTermCriteria cvTermCriteria( int type, int max_iter, double epsilon ) { - CvTermCriteria t; - - t.type = type; - t.max_iter = max_iter; - t.epsilon = (float)epsilon; - +#if !(defined(CV__ENABLE_C_API_CTORS) && defined(__cplusplus)) + CvTermCriteria t = { type, max_iter, (float)epsilon}; +#else + CvTermCriteria t(type, max_iter, epsilon); +#endif return t; } +#ifdef __cplusplus +CV_INLINE CvTermCriteria cvTermCriteria(const cv::TermCriteria& t) { return cvTermCriteria(t.type, t.maxCount, t.epsilon); } +#endif /******************************* CvPoint and variants ***********************************/ @@ -868,10 +948,23 @@ typedef struct CvPoint int x; int y; -#ifdef __cplusplus +#ifdef CV__VALIDATE_UNUNITIALIZED_VARS + CvPoint() __attribute__(( warning("Non-initialized variable") )) {} + template CvPoint(const std::initializer_list<_Tp> list) + { + CV_Assert(list.size() == 0 || list.size() == 2); + x = y = 0; + if (list.size() == 2) + { + x = list.begin()[0]; y = list.begin()[1]; + } + }; +#elif defined(CV__ENABLE_C_API_CTORS) && defined(__cplusplus) CvPoint(int _x = 0, int _y = 0): x(_x), y(_y) {} template CvPoint(const cv::Point_<_Tp>& pt): x((int)pt.x), y((int)pt.y) {} +#endif +#ifdef __cplusplus template operator cv::Point_<_Tp>() const { return cv::Point_<_Tp>(cv::saturate_cast<_Tp>(x), cv::saturate_cast<_Tp>(y)); } #endif @@ -881,24 +974,39 @@ CvPoint; /** constructs CvPoint structure. */ CV_INLINE CvPoint cvPoint( int x, int y ) { - CvPoint p; - - p.x = x; - p.y = y; - +#if !(defined(CV__ENABLE_C_API_CTORS) && defined(__cplusplus)) + CvPoint p = {x, y}; +#else + CvPoint p(x, y); +#endif return p; } - +#ifdef __cplusplus +CV_INLINE CvPoint cvPoint(const cv::Point& pt) { return cvPoint(pt.x, pt.y); } +#endif typedef struct CvPoint2D32f { float x; float y; -#ifdef __cplusplus +#ifdef CV__VALIDATE_UNUNITIALIZED_VARS + CvPoint2D32f() __attribute__(( warning("Non-initialized variable") )) {} + template CvPoint2D32f(const std::initializer_list<_Tp> list) + { + CV_Assert(list.size() == 0 || list.size() == 2); + x = y = 0; + if (list.size() == 2) + { + x = list.begin()[0]; y = list.begin()[1]; + } + }; +#elif defined(CV__ENABLE_C_API_CTORS) && defined(__cplusplus) CvPoint2D32f(float _x = 0, float _y = 0): x(_x), y(_y) {} template CvPoint2D32f(const cv::Point_<_Tp>& pt): x((float)pt.x), y((float)pt.y) {} +#endif +#ifdef __cplusplus template operator cv::Point_<_Tp>() const { return cv::Point_<_Tp>(cv::saturate_cast<_Tp>(x), cv::saturate_cast<_Tp>(y)); } #endif @@ -908,13 +1016,26 @@ CvPoint2D32f; /** constructs CvPoint2D32f structure. */ CV_INLINE CvPoint2D32f cvPoint2D32f( double x, double y ) { - CvPoint2D32f p; - - p.x = (float)x; - p.y = (float)y; +#if !(defined(CV__ENABLE_C_API_CTORS) && defined(__cplusplus)) + CvPoint2D32f p = { (float)x, (float)y }; +#else + CvPoint2D32f p((float)x, (float)y); +#endif + return p; +} +#ifdef __cplusplus +template +CvPoint2D32f cvPoint2D32f(const cv::Point_<_Tp>& pt) +{ +#if !(defined(CV__ENABLE_C_API_CTORS) && defined(__cplusplus)) + CvPoint2D32f p = { (float)pt.x, (float)pt.y }; +#else + CvPoint2D32f p((float)pt.x, (float)pt.y); +#endif return p; } +#endif /** converts CvPoint to CvPoint2D32f. */ CV_INLINE CvPoint2D32f cvPointTo32f( CvPoint point ) @@ -925,10 +1046,11 @@ CV_INLINE CvPoint2D32f cvPointTo32f( CvPoint point ) /** converts CvPoint2D32f to CvPoint. */ CV_INLINE CvPoint cvPointFrom32f( CvPoint2D32f point ) { - CvPoint ipt; - ipt.x = cvRound(point.x); - ipt.y = cvRound(point.y); - +#if !(defined(CV__ENABLE_C_API_CTORS) && defined(__cplusplus)) + CvPoint ipt = { cvRound(point.x), cvRound(point.y) }; +#else + CvPoint ipt(cvRound(point.x), cvRound(point.y)); +#endif return ipt; } @@ -939,10 +1061,23 @@ typedef struct CvPoint3D32f float y; float z; -#ifdef __cplusplus +#ifdef CV__VALIDATE_UNUNITIALIZED_VARS + CvPoint3D32f() __attribute__(( warning("Non-initialized variable") )) {} + template CvPoint3D32f(const std::initializer_list<_Tp> list) + { + CV_Assert(list.size() == 0 || list.size() == 3); + x = y = z = 0; + if (list.size() == 3) + { + x = list.begin()[0]; y = list.begin()[1]; z = list.begin()[2]; + } + }; +#elif defined(CV__ENABLE_C_API_CTORS) && defined(__cplusplus) CvPoint3D32f(float _x = 0, float _y = 0, float _z = 0): x(_x), y(_y), z(_z) {} template CvPoint3D32f(const cv::Point3_<_Tp>& pt): x((float)pt.x), y((float)pt.y), z((float)pt.z) {} +#endif +#ifdef __cplusplus template operator cv::Point3_<_Tp>() const { return cv::Point3_<_Tp>(cv::saturate_cast<_Tp>(x), cv::saturate_cast<_Tp>(y), cv::saturate_cast<_Tp>(z)); } #endif @@ -952,31 +1087,51 @@ CvPoint3D32f; /** constructs CvPoint3D32f structure. */ CV_INLINE CvPoint3D32f cvPoint3D32f( double x, double y, double z ) { - CvPoint3D32f p; - - p.x = (float)x; - p.y = (float)y; - p.z = (float)z; +#if !(defined(CV__ENABLE_C_API_CTORS) && defined(__cplusplus)) + CvPoint3D32f p = { (float)x, (float)y, (float)z }; +#else + CvPoint3D32f p((float)x, (float)y, (float)z); +#endif + return p; +} +#ifdef __cplusplus +template +CvPoint3D32f cvPoint3D32f(const cv::Point3_<_Tp>& pt) +{ +#if !(defined(CV__ENABLE_C_API_CTORS) && defined(__cplusplus)) + CvPoint3D32f p = { (float)pt.x, (float)pt.y, (float)pt.z }; +#else + CvPoint3D32f p((float)pt.x, (float)pt.y, (float)pt.z); +#endif return p; } +#endif typedef struct CvPoint2D64f { double x; double y; +#ifdef CV__VALIDATE_UNUNITIALIZED_VARS + CvPoint2D64f() __attribute__(( warning("Non-initialized variable") )) {} + template CvPoint2D64f(const std::initializer_list<_Tp> list) + { + CV_Assert(list.size() == 0 || list.size() == 2); + x = y = 0; + if (list.size() == 2) + { + x = list.begin()[0]; y = list.begin()[1]; + } + }; +#endif } CvPoint2D64f; /** constructs CvPoint2D64f structure.*/ CV_INLINE CvPoint2D64f cvPoint2D64f( double x, double y ) { - CvPoint2D64f p; - - p.x = x; - p.y = y; - + CvPoint2D64f p = { x, y }; return p; } @@ -986,18 +1141,25 @@ typedef struct CvPoint3D64f double x; double y; double z; +#ifdef CV__VALIDATE_UNUNITIALIZED_VARS + CvPoint3D64f() __attribute__(( warning("Non-initialized variable") )) {} + template CvPoint3D64f(const std::initializer_list<_Tp> list) + { + CV_Assert(list.size() == 0 || list.size() == 3); + x = y = z = 0; + if (list.size() == 3) + { + x = list.begin()[0]; y = list.begin()[1]; z = list.begin()[2]; + } + }; +#endif } CvPoint3D64f; /** constructs CvPoint3D64f structure. */ CV_INLINE CvPoint3D64f cvPoint3D64f( double x, double y, double z ) { - CvPoint3D64f p; - - p.x = x; - p.y = y; - p.z = z; - + CvPoint3D64f p = { x, y, z }; return p; } @@ -1009,10 +1171,23 @@ typedef struct CvSize int width; int height; -#ifdef __cplusplus +#ifdef CV__VALIDATE_UNUNITIALIZED_VARS + CvSize() __attribute__(( warning("Non-initialized variable") )) {} + template CvSize(const std::initializer_list<_Tp> list) + { + CV_Assert(list.size() == 0 || list.size() == 2); + width = 0; height = 0; + if (list.size() == 2) + { + width = list.begin()[0]; height = list.begin()[1]; + } + }; +#elif defined(CV__ENABLE_C_API_CTORS) && defined(__cplusplus) CvSize(int w = 0, int h = 0): width(w), height(h) {} template CvSize(const cv::Size_<_Tp>& sz): width(cv::saturate_cast(sz.width)), height(cv::saturate_cast(sz.height)) {} +#endif +#ifdef __cplusplus template operator cv::Size_<_Tp>() const { return cv::Size_<_Tp>(cv::saturate_cast<_Tp>(width), cv::saturate_cast<_Tp>(height)); } #endif @@ -1022,23 +1197,48 @@ CvSize; /** constructs CvSize structure. */ CV_INLINE CvSize cvSize( int width, int height ) { - CvSize s; - - s.width = width; - s.height = height; +#if !(defined(CV__ENABLE_C_API_CTORS) && defined(__cplusplus)) + CvSize s = { width, height }; +#else + CvSize s(width, height); +#endif + return s; +} +#ifdef __cplusplus +CV_INLINE CvSize cvSize(const cv::Size& sz) +{ +#if !(defined(CV__ENABLE_C_API_CTORS) && defined(__cplusplus)) + CvSize s = { sz.width, sz.height }; +#else + CvSize s(sz.width, sz.height); +#endif return s; } +#endif typedef struct CvSize2D32f { float width; float height; -#ifdef __cplusplus +#ifdef CV__VALIDATE_UNUNITIALIZED_VARS + CvSize2D32f() __attribute__(( warning("Non-initialized variable") )) {} + template CvSize2D32f(const std::initializer_list<_Tp> list) + { + CV_Assert(list.size() == 0 || list.size() == 2); + width = 0; height = 0; + if (list.size() == 2) + { + width = list.begin()[0]; height = list.begin()[1]; + } + }; +#elif defined(CV__ENABLE_C_API_CTORS) && defined(__cplusplus) CvSize2D32f(float w = 0, float h = 0): width(w), height(h) {} template CvSize2D32f(const cv::Size_<_Tp>& sz): width(cv::saturate_cast(sz.width)), height(cv::saturate_cast(sz.height)) {} +#endif +#ifdef __cplusplus template operator cv::Size_<_Tp>() const { return cv::Size_<_Tp>(cv::saturate_cast<_Tp>(width), cv::saturate_cast<_Tp>(height)); } #endif @@ -1048,13 +1248,25 @@ CvSize2D32f; /** constructs CvSize2D32f structure. */ CV_INLINE CvSize2D32f cvSize2D32f( double width, double height ) { - CvSize2D32f s; - - s.width = (float)width; - s.height = (float)height; - +#if !(defined(CV__ENABLE_C_API_CTORS) && defined(__cplusplus)) + CvSize2D32f s = { (float)width, (float)height }; +#else + CvSize2D32f s((float)width, (float)height); +#endif + return s; +} +#ifdef __cplusplus +template +CvSize2D32f cvSize2D32f(const cv::Size_<_Tp>& sz) +{ +#if !(defined(CV__ENABLE_C_API_CTORS) && defined(__cplusplus)) + CvSize2D32f s = { (float)sz.width, (float)sz.height }; +#else + CvSize2D32f s((float)sz.width, (float)sz.height); +#endif return s; } +#endif /** @sa RotatedRect */ @@ -1065,15 +1277,37 @@ typedef struct CvBox2D float angle; /**< Angle between the horizontal axis */ /**< and the first side (i.e. length) in degrees */ -#ifdef __cplusplus +#if defined(CV__ENABLE_C_API_CTORS) && defined(__cplusplus) CvBox2D(CvPoint2D32f c = CvPoint2D32f(), CvSize2D32f s = CvSize2D32f(), float a = 0) : center(c), size(s), angle(a) {} CvBox2D(const cv::RotatedRect& rr) : center(rr.center), size(rr.size), angle(rr.angle) {} +#endif +#ifdef __cplusplus operator cv::RotatedRect() const { return cv::RotatedRect(center, size, angle); } #endif } CvBox2D; +#ifdef __cplusplus +CV_INLINE CvBox2D cvBox2D(CvPoint2D32f c = CvPoint2D32f(), CvSize2D32f s = CvSize2D32f(), float a = 0) +{ + CvBox2D self; + self.center = c; + self.size = s; + self.angle = a; + return self; +} +CV_INLINE CvBox2D cvBox2D(const cv::RotatedRect& rr) +{ + CvBox2D self; + self.center = cvPoint2D32f(rr.center); + self.size = cvSize2D32f(rr.size); + self.angle = rr.angle; + return self; +} +#endif + + /** Line iterator state: */ typedef struct CvLineIterator { @@ -1099,7 +1333,19 @@ typedef struct CvSlice { int start_index, end_index; -#if defined(__cplusplus) && !defined(__CUDACC__) +#ifdef CV__VALIDATE_UNUNITIALIZED_VARS + CvSlice() __attribute__(( warning("Non-initialized variable") )) {} + template CvSlice(const std::initializer_list<_Tp> list) + { + CV_Assert(list.size() == 0 || list.size() == 2); + start_index = end_index = 0; + if (list.size() == 2) + { + start_index = list.begin()[0]; end_index = list.begin()[1]; + } + }; +#endif +#if defined(CV__ENABLE_C_API_CTORS) && defined(__cplusplus) && !defined(__CUDACC__) CvSlice(int start = 0, int end = 0) : start_index(start), end_index(end) {} CvSlice(const cv::Range& r) { *this = (r.start != INT_MIN && r.end != INT_MAX) ? CvSlice(r.start, r.end) : CvSlice(0, CV_WHOLE_SEQ_END_INDEX); } operator cv::Range() const { return (start_index == 0 && end_index == CV_WHOLE_SEQ_END_INDEX ) ? cv::Range::all() : cv::Range(start_index, end_index); } @@ -1109,13 +1355,21 @@ CvSlice; CV_INLINE CvSlice cvSlice( int start, int end ) { - CvSlice slice; - slice.start_index = start; - slice.end_index = end; - +#if !(defined(CV__ENABLE_C_API_CTORS) && defined(__cplusplus) && !defined(__CUDACC__)) + CvSlice slice = { start, end }; +#else + CvSlice slice(start, end); +#endif return slice; } +#if defined(__cplusplus) +CV_INLINE CvSlice cvSlice(const cv::Range& r) +{ + CvSlice slice = (r.start != INT_MIN && r.end != INT_MAX) ? cvSlice(r.start, r.end) : cvSlice(0, CV_WHOLE_SEQ_END_INDEX); + return slice; +} +#endif /************************************* CvScalar *****************************************/ @@ -1125,13 +1379,22 @@ typedef struct CvScalar { double val[4]; -#ifdef __cplusplus +#ifdef CV__VALIDATE_UNUNITIALIZED_VARS + CvScalar() __attribute__(( warning("Non-initialized variable") )) {} + CvScalar(const std::initializer_list list) + { + CV_Assert(list.size() == 0 || list.size() == 4); + val[0] = val[1] = val[2] = val[3] = 0; + if (list.size() == 4) + { + val[0] = list.begin()[0]; val[1] = list.begin()[1]; val[2] = list.begin()[2]; val[3] = list.begin()[3]; + } + }; +#elif defined(CV__ENABLE_C_API_CTORS) && defined(__cplusplus) CvScalar() {} CvScalar(double d0, double d1 = 0, double d2 = 0, double d3 = 0) { val[0] = d0; val[1] = d1; val[2] = d2; val[3] = d3; } template CvScalar(const cv::Scalar_<_Tp>& s) { val[0] = s.val[0]; val[1] = s.val[1]; val[2] = s.val[2]; val[3] = s.val[3]; } - template - operator cv::Scalar_<_Tp>() const { return cv::Scalar_<_Tp>(cv::saturate_cast<_Tp>(val[0]), cv::saturate_cast<_Tp>(val[1]), cv::saturate_cast<_Tp>(val[2]), cv::saturate_cast<_Tp>(val[3])); } template CvScalar(const cv::Vec<_Tp, cn>& v) { @@ -1140,22 +1403,59 @@ typedef struct CvScalar for( ; i < 4; i++ ) val[i] = 0; } #endif +#ifdef __cplusplus + template + operator cv::Scalar_<_Tp>() const { return cv::Scalar_<_Tp>(cv::saturate_cast<_Tp>(val[0]), cv::saturate_cast<_Tp>(val[1]), cv::saturate_cast<_Tp>(val[2]), cv::saturate_cast<_Tp>(val[3])); } +#endif } CvScalar; CV_INLINE CvScalar cvScalar( double val0, double val1 CV_DEFAULT(0), double val2 CV_DEFAULT(0), double val3 CV_DEFAULT(0)) { +#if !(defined(CV__ENABLE_C_API_CTORS) && defined(__cplusplus)) + CvScalar scalar = CV_STRUCT_INITIALIZER; +#else CvScalar scalar; +#endif scalar.val[0] = val0; scalar.val[1] = val1; scalar.val[2] = val2; scalar.val[3] = val3; return scalar; } +#ifdef __cplusplus +CV_INLINE CvScalar cvScalar() +{ +#if !(defined(CV__ENABLE_C_API_CTORS) && defined(__cplusplus)) + CvScalar scalar = CV_STRUCT_INITIALIZER; +#else + CvScalar scalar; +#endif + scalar.val[0] = scalar.val[1] = scalar.val[2] = scalar.val[3] = 0; + return scalar; +} +CV_INLINE CvScalar cvScalar(const cv::Scalar& s) +{ +#if !(defined(CV__ENABLE_C_API_CTORS) && defined(__cplusplus)) + CvScalar scalar = CV_STRUCT_INITIALIZER; +#else + CvScalar scalar; +#endif + scalar.val[0] = s.val[0]; + scalar.val[1] = s.val[1]; + scalar.val[2] = s.val[2]; + scalar.val[3] = s.val[3]; + return scalar; +} +#endif CV_INLINE CvScalar cvRealScalar( double val0 ) { +#if !(defined(CV__ENABLE_C_API_CTORS) && defined(__cplusplus)) + CvScalar scalar = CV_STRUCT_INITIALIZER; +#else CvScalar scalar; +#endif scalar.val[0] = val0; scalar.val[1] = scalar.val[2] = scalar.val[3] = 0; return scalar; @@ -1163,7 +1463,11 @@ CV_INLINE CvScalar cvRealScalar( double val0 ) CV_INLINE CvScalar cvScalarAll( double val0123 ) { +#if !(defined(CV__ENABLE_C_API_CTORS) && defined(__cplusplus)) + CvScalar scalar = CV_STRUCT_INITIALIZER; +#else CvScalar scalar; +#endif scalar.val[0] = val0123; scalar.val[1] = val0123; scalar.val[2] = val0123; @@ -1216,7 +1520,7 @@ typedef struct CvSeqBlock { struct CvSeqBlock* prev; /**< Previous sequence block. */ struct CvSeqBlock* next; /**< Next sequence block. */ - int start_index; /**< Index of the first element in the block + */ + int start_index; /**< Index of the first element in the block + */ /**< sequence->first->start_index. */ int count; /**< Number of elements in the block. */ schar* data; /**< Pointer to the first element of the block. */ @@ -1361,7 +1665,7 @@ CvGraph; /** @} */ -/*********************************** Chain/Countour *************************************/ +/*********************************** Chain/Contour *************************************/ typedef struct CvChain { @@ -1403,7 +1707,7 @@ typedef CvContour CvPoint2DSeq; #define CV_SEQ_ELTYPE_POINT CV_32SC2 /**< (x,y) */ #define CV_SEQ_ELTYPE_CODE CV_8UC1 /**< freeman code: 0..7 */ #define CV_SEQ_ELTYPE_GENERIC 0 -#define CV_SEQ_ELTYPE_PTR CV_USRTYPE1 +#define CV_SEQ_ELTYPE_PTR CV_MAKE_TYPE(CV_8U, 8 /*sizeof(void*)*/) #define CV_SEQ_ELTYPE_PPOINT CV_SEQ_ELTYPE_PTR /**< &(x,y) */ #define CV_SEQ_ELTYPE_INDEX CV_32SC1 /**< #(x,y) */ #define CV_SEQ_ELTYPE_GRAPH_EDGE 0 /**< &next_o, &next_d, &vtx_o, &vtx_d */ @@ -1655,6 +1959,8 @@ CvSeqReader; * Data structures for persistence (a.k.a serialization) functionality * \****************************************************************************************/ +#if 0 + /** "black box" file storage */ typedef struct CvFileStorage CvFileStorage; @@ -1669,6 +1975,9 @@ typedef struct CvFileStorage CvFileStorage; #define CV_STORAGE_FORMAT_AUTO 0 #define CV_STORAGE_FORMAT_XML 8 #define CV_STORAGE_FORMAT_YAML 16 +#define CV_STORAGE_FORMAT_JSON 24 +#define CV_STORAGE_BASE64 64 +#define CV_STORAGE_WRITE_BASE64 (CV_STORAGE_BASE64 | CV_STORAGE_WRITE) /** @brief List of attributes. : @@ -1738,7 +2047,7 @@ typedef struct CvString } CvString; -/** All the keys (names) of elements in the readed file storage +/** All the keys (names) of elements in the read file storage are stored in the hash to speed up the lookup operations: */ typedef struct CvStringHashNode { @@ -1804,31 +2113,10 @@ typedef struct CvTypeInfo CvCloneFunc clone; /**< creates a copy of the object */ } CvTypeInfo; - - -/**** System data types ******/ - -typedef struct CvPluginFuncInfo -{ - void** func_addr; - void* default_func_addr; - const char* func_names; - int search_modules; - int loaded_from; -} -CvPluginFuncInfo; - -typedef struct CvModuleInfo -{ - struct CvModuleInfo* next; - const char* name; - const char* version; - CvPluginFuncInfo* func_tab; -} -CvModuleInfo; +#endif /** @} */ -#endif /*__OPENCV_CORE_TYPES_H__*/ +#endif /*OPENCV_CORE_TYPES_H*/ /* End of file. */ diff --git a/IPL/include/opencv/opencv2/core/utility.hpp b/IPL/include/opencv/opencv2/core/utility.hpp index 1e6249d..3d2a035 100644 --- a/IPL/include/opencv/opencv2/core/utility.hpp +++ b/IPL/include/opencv/opencv2/core/utility.hpp @@ -42,8 +42,8 @@ // //M*/ -#ifndef __OPENCV_CORE_UTILITY_H__ -#define __OPENCV_CORE_UTILITY_H__ +#ifndef OPENCV_CORE_UTILITY_H +#define OPENCV_CORE_UTILITY_H #ifndef __cplusplus # error utility.hpp header must be compiled as C++ @@ -54,34 +54,17 @@ #endif #include "opencv2/core.hpp" +#include -namespace cv -{ +#include -#ifdef CV_COLLECT_IMPL_DATA -CV_EXPORTS void setImpl(int flags); // set implementation flags and reset storage arrays -CV_EXPORTS void addImpl(int flag, const char* func = 0); // add implementation and function name to storage arrays -// Get stored implementation flags and fucntions names arrays -// Each implementation entry correspond to function name entry, so you can find which implementation was executed in which fucntion -CV_EXPORTS int getImpl(std::vector &impl, std::vector &funName); - -CV_EXPORTS bool useCollection(); // return implementation collection state -CV_EXPORTS void setUseCollection(bool flag); // set implementation collection state - -#define CV_IMPL_PLAIN 0x01 // native CPU OpenCV implementation -#define CV_IMPL_OCL 0x02 // OpenCL implementation -#define CV_IMPL_IPP 0x04 // IPP implementation -#define CV_IMPL_MT 0x10 // multithreaded implementation - -#define CV_IMPL_ADD(impl) \ - if(cv::useCollection()) \ - { \ - cv::addImpl(impl, CV_Func); \ - } -#else -#define CV_IMPL_ADD(impl) +#if !defined(_M_CEE) +#include // std::mutex, std::lock_guard #endif +namespace cv +{ + //! @addtogroup core_utils //! @{ @@ -102,7 +85,7 @@ CV_EXPORTS void setUseCollection(bool flag); // set implementation collection st \code void my_func(const cv::Mat& m) { - cv::AutoBuffer buf; // create automatic buffer containing 1000 floats + cv::AutoBuffer buf(1000); // create automatic buffer containing 1000 floats buf.allocate(m.rows); // if m.rows <= 1000, the pre-allocated buffer is used, // otherwise the buffer of "m.rows" floats will be allocated @@ -111,7 +94,11 @@ CV_EXPORTS void setUseCollection(bool flag); // set implementation collection st } \endcode */ +#ifdef OPENCV_ENABLE_MEMORY_SANITIZER +template class AutoBuffer +#else template class AutoBuffer +#endif { public: typedef _Tp value_type; @@ -119,7 +106,7 @@ template class AutoBuffer //! the default constructor AutoBuffer(); //! constructor taking the real buffer size - AutoBuffer(size_t _size); + explicit AutoBuffer(size_t _size); //! the copy constructor AutoBuffer(const AutoBuffer<_Tp, fixed_size>& buf); @@ -137,17 +124,29 @@ template class AutoBuffer void resize(size_t _size); //! returns the current buffer size size_t size() const; - //! returns pointer to the real buffer, stack-allocated or head-allocated - operator _Tp* (); - //! returns read-only pointer to the real buffer, stack-allocated or head-allocated - operator const _Tp* () const; + //! returns pointer to the real buffer, stack-allocated or heap-allocated + inline _Tp* data() { return ptr; } + //! returns read-only pointer to the real buffer, stack-allocated or heap-allocated + inline const _Tp* data() const { return ptr; } + +#if !defined(OPENCV_DISABLE_DEPRECATED_COMPATIBILITY) // use to .data() calls instead + //! returns pointer to the real buffer, stack-allocated or heap-allocated + operator _Tp* () { return ptr; } + //! returns read-only pointer to the real buffer, stack-allocated or heap-allocated + operator const _Tp* () const { return ptr; } +#else + //! returns a reference to the element at specified location. No bounds checking is performed in Release builds. + inline _Tp& operator[] (size_t i) { CV_DbgCheckLT(i, sz, "out of range"); return ptr[i]; } + //! returns a reference to the element at specified location. No bounds checking is performed in Release builds. + inline const _Tp& operator[] (size_t i) const { CV_DbgCheckLT(i, sz, "out of range"); return ptr[i]; } +#endif protected: //! pointer to the real buffer, can point to buf if the buffer is small enough _Tp* ptr; //! size of the real buffer size_t sz; - //! pre-allocated buffer. At least 1 element to confirm C++ standard reqirements + //! pre-allocated buffer. At least 1 element to confirm C++ standard requirements _Tp buf[(fixed_size > 0) ? fixed_size : 1]; }; @@ -177,13 +176,6 @@ extern "C" typedef int (*ErrorCallback)( int status, const char* func_name, */ CV_EXPORTS ErrorCallback redirectError( ErrorCallback errCallback, void* userdata=0, void** prevUserdata=0); -/** @brief Returns a text string formatted using the printf-like expression. - -The function acts like sprintf but forms and returns an STL string. It can be used to form an error -message in the Exception constructor. -@param fmt printf-compatible formatting specifiers. - */ -CV_EXPORTS String format( const char* fmt, ... ); CV_EXPORTS String tempfile( const char* suffix = 0); CV_EXPORTS void glob(String pattern, std::vector& result, bool recursive = false); @@ -193,15 +185,15 @@ If threads == 0, OpenCV will disable threading optimizations and run all it's fu sequentially. Passing threads \< 0 will reset threads number to system default. This function must be called outside of parallel region. -OpenCV will try to run it's functions with specified threads number, but some behaviour differs from +OpenCV will try to run its functions with specified threads number, but some behaviour differs from framework: -- `TBB` – User-defined parallel constructions will run with the same threads number, if - another does not specified. If late on user creates own scheduler, OpenCV will be use it. -- `OpenMP` – No special defined behaviour. -- `Concurrency` – If threads == 1, OpenCV will disable threading optimizations and run it's +- `TBB` - User-defined parallel constructions will run with the same threads number, if + another is not specified. If later on user creates his own scheduler, OpenCV will use it. +- `OpenMP` - No special defined behaviour. +- `Concurrency` - If threads == 1, OpenCV will disable threading optimizations and run its functions sequentially. -- `GCD` – Supports only values \<= 0. -- `C=` – No special defined behaviour. +- `GCD` - Supports only values \<= 0. +- `C=` - No special defined behaviour. @param nthreads Number of threads used by OpenCV. @sa getNumThreads, getThreadNum */ @@ -212,13 +204,13 @@ CV_EXPORTS_W void setNumThreads(int nthreads); Always returns 1 if OpenCV is built without threading support. The exact meaning of return value depends on the threading framework used by OpenCV library: -- `TBB` – The number of threads, that OpenCV will try to use for parallel regions. If there is +- `TBB` - The number of threads, that OpenCV will try to use for parallel regions. If there is any tbb::thread_scheduler_init in user code conflicting with OpenCV, then function returns default number of threads used by TBB library. -- `OpenMP` – An upper bound on the number of threads that could be used to form a new team. -- `Concurrency` – The number of threads, that OpenCV will try to use for parallel regions. -- `GCD` – Unsupported; returns the GCD thread pool limit (512) for compatibility. -- `C=` – The number of threads, that OpenCV will try to use for parallel regions, if before +- `OpenMP` - An upper bound on the number of threads that could be used to form a new team. +- `Concurrency` - The number of threads, that OpenCV will try to use for parallel regions. +- `GCD` - Unsupported; returns the GCD thread pool limit (512) for compatibility. +- `C=` - The number of threads, that OpenCV will try to use for parallel regions, if before called setNumThreads with threads \> 0, otherwise returns the number of logical CPUs, available for the process. @sa setNumThreads, getThreadNum @@ -228,13 +220,15 @@ CV_EXPORTS_W int getNumThreads(); /** @brief Returns the index of the currently executed thread within the current parallel region. Always returns 0 if called outside of parallel region. -The exact meaning of return value depends on the threading framework used by OpenCV library: -- `TBB` – Unsupported with current 4.1 TBB release. May be will be supported in future. -- `OpenMP` – The thread number, within the current team, of the calling thread. -- `Concurrency` – An ID for the virtual processor that the current context is executing on (0 +@deprecated Current implementation doesn't corresponding to this documentation. + +The exact meaning of the return value depends on the threading framework used by OpenCV library: +- `TBB` - Unsupported with current 4.1 TBB release. Maybe will be supported in future. +- `OpenMP` - The thread number, within the current team, of the calling thread. +- `Concurrency` - An ID for the virtual processor that the current context is executing on (0 for master thread and unique number for others, but not necessary 1,2,3,...). -- `GCD` – System calling thread's ID. Never returns 0 inside parallel region. -- `C=` – The index of the current parallel task. +- `GCD` - System calling thread's ID. Never returns 0 inside parallel region. +- `C=` - The index of the current parallel task. @sa setNumThreads, getNumThreads */ CV_EXPORTS_W int getThreadNum(); @@ -247,11 +241,29 @@ architecture. */ CV_EXPORTS_W const String& getBuildInformation(); +/** @brief Returns library version string + +For example "3.4.1-dev". + +@sa getMajorVersion, getMinorVersion, getRevisionVersion +*/ +CV_EXPORTS_W String getVersionString(); + +/** @brief Returns major library version */ +CV_EXPORTS_W int getVersionMajor(); + +/** @brief Returns minor library version */ +CV_EXPORTS_W int getVersionMinor(); + +/** @brief Returns revision field of the library version */ +CV_EXPORTS_W int getVersionRevision(); + /** @brief Returns the number of ticks. The function returns the number of ticks after the certain event (for example, when the machine was turned on). It can be used to initialize RNG or to measure a function execution time by reading the -tick count before and after the function call. See also the tick frequency. +tick count before and after the function call. +@sa getTickFrequency, TickMeter */ CV_EXPORTS_W int64 getTickCount(); @@ -264,9 +276,139 @@ execution time in seconds: // do something ... t = ((double)getTickCount() - t)/getTickFrequency(); @endcode +@sa getTickCount, TickMeter */ CV_EXPORTS_W double getTickFrequency(); +/** @brief a Class to measure passing time. + +The class computes passing time by counting the number of ticks per second. That is, the following code computes the +execution time in seconds: +@code +TickMeter tm; +tm.start(); +// do something ... +tm.stop(); +std::cout << tm.getTimeSec(); +@endcode + +It is also possible to compute the average time over multiple runs: +@code +TickMeter tm; +for (int i = 0; i < 100; i++) +{ + tm.start(); + // do something ... + tm.stop(); +} +double average_time = tm.getTimeSec() / tm.getCounter(); +std::cout << "Average time in second per iteration is: " << average_time << std::endl; +@endcode +@sa getTickCount, getTickFrequency +*/ + +class CV_EXPORTS_W TickMeter +{ +public: + //! the default constructor + CV_WRAP TickMeter() + { + reset(); + } + + /** + starts counting ticks. + */ + CV_WRAP void start() + { + startTime = cv::getTickCount(); + } + + /** + stops counting ticks. + */ + CV_WRAP void stop() + { + int64 time = cv::getTickCount(); + if (startTime == 0) + return; + ++counter; + sumTime += (time - startTime); + startTime = 0; + } + + /** + returns counted ticks. + */ + CV_WRAP int64 getTimeTicks() const + { + return sumTime; + } + + /** + returns passed time in microseconds. + */ + CV_WRAP double getTimeMicro() const + { + return getTimeMilli()*1e3; + } + + /** + returns passed time in milliseconds. + */ + CV_WRAP double getTimeMilli() const + { + return getTimeSec()*1e3; + } + + /** + returns passed time in seconds. + */ + CV_WRAP double getTimeSec() const + { + return (double)getTimeTicks() / getTickFrequency(); + } + + /** + returns internal counter value. + */ + CV_WRAP int64 getCounter() const + { + return counter; + } + + /** + resets internal values. + */ + CV_WRAP void reset() + { + startTime = 0; + sumTime = 0; + counter = 0; + } + +private: + int64 counter; + int64 sumTime; + int64 startTime; +}; + +/** @brief output operator +@code +TickMeter tm; +tm.start(); +// do something ... +tm.stop(); +std::cout << tm; +@endcode +*/ + +static inline +std::ostream& operator << (std::ostream& out, const TickMeter& tm) +{ + return out << tm.getTimeSec() << "sec"; +} + /** @brief Returns the number of CPU ticks. The function returns the current number of CPU ticks on some architectures (such as x86, x64, @@ -291,6 +433,24 @@ in OpenCV. */ CV_EXPORTS_W bool checkHardwareSupport(int feature); +/** @brief Returns feature name by ID + +Returns empty string if feature is not defined +*/ +CV_EXPORTS_W String getHardwareFeatureName(int feature); + +/** @brief Returns list of CPU features enabled during compilation. + +Returned value is a string containing space separated list of CPU features with following markers: + +- no markers - baseline features +- prefix `*` - features enabled in dispatcher +- suffix `?` - features enabled but not available in HW + +Example: `SSE SSE2 SSE3 *SSE4.1 *SSE4.2 *FP16 *AVX *AVX2 *AVX512-SKX?` +*/ +CV_EXPORTS_W std::string getCPUFeaturesLine(); + /** @brief Returns the number of logical CPUs available for the process. */ CV_EXPORTS_W int getNumberOfCPUs(); @@ -305,12 +465,13 @@ The function returns the aligned pointer of the same type as the input pointer: */ template static inline _Tp* alignPtr(_Tp* ptr, int n=(int)sizeof(_Tp)) { + CV_DbgAssert((n & (n - 1)) == 0); // n is a power of 2 return (_Tp*)(((size_t)ptr + n-1) & -n); } /** @brief Aligns a buffer size to the specified number of bytes. -The function returns the minimum number that is greater or equal to sz and is divisible by n : +The function returns the minimum number that is greater than or equal to sz and is divisible by n : \f[\texttt{(sz + n-1) & -n}\f] @param sz Buffer size to align. @param n Alignment size that must be a power of two. @@ -321,9 +482,80 @@ static inline size_t alignSize(size_t sz, int n) return (sz + n-1) & -n; } +/** @brief Integer division with result round up. + +Use this function instead of `ceil((float)a / b)` expressions. + +@sa alignSize +*/ +static inline int divUp(int a, unsigned int b) +{ + CV_DbgAssert(a >= 0); + return (a + b - 1) / b; +} +/** @overload */ +static inline size_t divUp(size_t a, unsigned int b) +{ + return (a + b - 1) / b; +} + +/** @brief Round first value up to the nearest multiple of second value. + +Use this function instead of `ceil((float)a / b) * b` expressions. + +@sa divUp +*/ +static inline int roundUp(int a, unsigned int b) +{ + CV_DbgAssert(a >= 0); + return a + b - 1 - (a + b -1) % b; +} +/** @overload */ +static inline size_t roundUp(size_t a, unsigned int b) +{ + return a + b - 1 - (a + b - 1) % b; +} + +/** @brief Alignment check of passed values + +Usage: `isAligned(...)` + +@note Alignment(N) must be a power of 2 (2**k, 2^k) +*/ +template static inline +bool isAligned(const T& data) +{ + CV_StaticAssert((N & (N - 1)) == 0, ""); // power of 2 + return (((size_t)data) & (N - 1)) == 0; +} +/** @overload */ +template static inline +bool isAligned(const void* p1) +{ + return isAligned((size_t)p1); +} +/** @overload */ +template static inline +bool isAligned(const void* p1, const void* p2) +{ + return isAligned(((size_t)p1)|((size_t)p2)); +} +/** @overload */ +template static inline +bool isAligned(const void* p1, const void* p2, const void* p3) +{ + return isAligned(((size_t)p1)|((size_t)p2)|((size_t)p3)); +} +/** @overload */ +template static inline +bool isAligned(const void* p1, const void* p2, const void* p3, const void* p4) +{ + return isAligned(((size_t)p1)|((size_t)p2)|((size_t)p3)|((size_t)p4)); +} + /** @brief Enables or disables the optimized code. -The function can be used to dynamically turn on and off optimized code (code that uses SSE2, AVX, +The function can be used to dynamically turn on and off optimized dispatched code (code that uses SSE4.2, AVX/AVX2, and other instructions on the platforms that support it). It sets a global flag that is further checked by OpenCV functions. Since the flag is not checked in the inner OpenCV loops, it is only safe to call the function on the very top level in your application where you can be sure that no @@ -342,7 +574,7 @@ The function returns true if the optimized code is enabled. Otherwise, it return */ CV_EXPORTS_W bool useOptimized(); -static inline size_t getElemSize(int type) { return CV_ELEM_SIZE(type); } +static inline size_t getElemSize(int type) { return (size_t)CV_ELEM_SIZE(type); } /////////////////////////////// Parallel Primitives ////////////////////////////////// @@ -359,17 +591,38 @@ class CV_EXPORTS ParallelLoopBody */ CV_EXPORTS void parallel_for_(const Range& range, const ParallelLoopBody& body, double nstripes=-1.); +class ParallelLoopBodyLambdaWrapper : public ParallelLoopBody +{ +private: + std::function m_functor; +public: + ParallelLoopBodyLambdaWrapper(std::function functor) : + m_functor(functor) + { } + + virtual void operator() (const cv::Range& range) const CV_OVERRIDE + { + m_functor(range); + } +}; + +inline void parallel_for_(const Range& range, std::function functor, double nstripes=-1.) +{ + parallel_for_(range, ParallelLoopBodyLambdaWrapper(functor), nstripes); +} + /////////////////////////////// forEach method of cv::Mat //////////////////////////// template inline void Mat::forEach_impl(const Functor& operation) { if (false) { - operation(*reinterpret_cast<_Tp*>(0), reinterpret_cast(NULL)); - // If your compiler fail in this line. + operation(*reinterpret_cast<_Tp*>(0), reinterpret_cast(0)); + // If your compiler fails in this line. // Please check that your functor signature is - // (_Tp&, const int*) <- multidimential - // or (_Tp&, void*) <- in case of you don't need current idx. + // (_Tp&, const int*) <- multi-dimensional + // or (_Tp&, void*) <- in case you don't need current idx. } + CV_Assert(!empty()); CV_Assert(this->total() / this->size[this->dims - 1] <= INT_MAX); const int LINES = static_cast(this->total() / this->size[this->dims - 1]); @@ -377,11 +630,12 @@ void Mat::forEach_impl(const Functor& operation) { { public: PixelOperationWrapper(Mat_<_Tp>* const frame, const Functor& _operation) - : mat(frame), op(_operation) {}; - virtual ~PixelOperationWrapper(){}; + : mat(frame), op(_operation) {} + virtual ~PixelOperationWrapper(){} // ! Overloaded virtual operator // convert range call to row call. - virtual void operator()(const Range &range) const { + virtual void operator()(const Range &range) const CV_OVERRIDE + { const int DIMS = mat->dims; const int COLS = mat->size[DIMS - 1]; if (DIMS <= 2) { @@ -389,7 +643,7 @@ void Mat::forEach_impl(const Functor& operation) { this->rowCall2(row, COLS); } } else { - std::vector idx(COLS); /// idx is modified in this->rowCall + std::vector idx(DIMS); /// idx is modified in this->rowCall idx[DIMS - 2] = range.start - 1; for (int line_num = range.start; line_num < range.end; ++line_num) { @@ -407,7 +661,7 @@ void Mat::forEach_impl(const Functor& operation) { this->rowCall(&idx[0], COLS, DIMS); } } - }; + } private: Mat_<_Tp>* const mat; const Functor op; @@ -444,12 +698,12 @@ void Mat::forEach_impl(const Functor& operation) { op(*pixel++, static_cast(idx)); idx[1]++; } - }; + } PixelOperationWrapper& operator=(const PixelOperationWrapper &) { CV_Assert(false); // We can not remove this implementation because Visual Studio warning C4822. return *this; - }; + } }; parallel_for_(cv::Range(0, LINES), PixelOperationWrapper(reinterpret_cast*>(this), operation)); @@ -457,84 +711,11 @@ void Mat::forEach_impl(const Functor& operation) { /////////////////////////// Synchronization Primitives /////////////////////////////// -class CV_EXPORTS Mutex -{ -public: - Mutex(); - ~Mutex(); - Mutex(const Mutex& m); - Mutex& operator = (const Mutex& m); - - void lock(); - bool trylock(); - void unlock(); - - struct Impl; -protected: - Impl* impl; -}; - -class CV_EXPORTS AutoLock -{ -public: - AutoLock(Mutex& m) : mutex(&m) { mutex->lock(); } - ~AutoLock() { mutex->unlock(); } -protected: - Mutex* mutex; -private: - AutoLock(const AutoLock&); - AutoLock& operator = (const AutoLock&); -}; - -// TLS interface -class CV_EXPORTS TLSDataContainer -{ -protected: - TLSDataContainer(); - virtual ~TLSDataContainer(); - - void gatherData(std::vector &data) const; -#if OPENCV_ABI_COMPATIBILITY > 300 - void* getData() const; - void release(); - -private: -#else - void release(); - -public: - void* getData() const; +#if !defined(_M_CEE) +typedef std::recursive_mutex Mutex; +typedef std::lock_guard AutoLock; #endif - virtual void* createDataInstance() const = 0; - virtual void deleteDataInstance(void* pData) const = 0; - int key_; -}; - -// Main TLS data class -template -class TLSData : protected TLSDataContainer -{ -public: - inline TLSData() {} - inline ~TLSData() { release(); } // Release key and delete associated data - inline T* get() const { return (T*)getData(); } // Get data assosiated with key - - // Get data from all threads - inline void gather(std::vector &data) const - { - std::vector &dataVoid = reinterpret_cast&>(data); - gatherData(dataVoid); - } - -private: - virtual void* createDataInstance() const {return new T;} // Wrapper to allocate data by template - virtual void deleteDataInstance(void* pData) const {delete (T*)pData;} // Wrapper to release data by template - - // Disable TLS copy operations - TLSData(TLSData &) {}; - TLSData& operator =(const TLSData &) {return *this;}; -}; /** @brief Designed for command line parsing @@ -569,7 +750,7 @@ The sample below demonstrates how to use CommandLineParser: ### Keys syntax -The keys parameter is a string containing several blocks, each one is enclosed in curley braces and +The keys parameter is a string containing several blocks, each one is enclosed in curly braces and describes one argument. Each argument contains three parts separated by the `|` symbol: -# argument names is a space-separated list of option synonyms (to mark argument as positional, prefix it with the `@` symbol) @@ -640,7 +821,7 @@ class CV_EXPORTS CommandLineParser This method returns the path to the executable from the command line (`argv[0]`). - For example, if the application has been started with such command: + For example, if the application has been started with such a command: @code{.sh} $ ./bin/my-executable @endcode @@ -727,7 +908,7 @@ class CV_EXPORTS CommandLineParser /** @brief Check for parsing errors - Returns true if error occured while accessing the parameters (bad conversion, missing arguments, + Returns false if error occurred while accessing the parameters (bad conversion, missing arguments, etc.). Call @ref printErrors to print error messages list. */ bool check() const; @@ -746,15 +927,15 @@ class CV_EXPORTS CommandLineParser */ void printMessage() const; - /** @brief Print list of errors occured + /** @brief Print list of errors occurred @sa check */ void printErrors() const; protected: - void getByName(const String& name, bool space_delete, int type, void* dst) const; - void getByIndex(int index, bool space_delete, int type, void* dst) const; + void getByName(const String& name, bool space_delete, Param type, void* dst) const; + void getByIndex(int index, bool space_delete, Param type, void* dst) const; struct Impl; Impl* impl; @@ -817,10 +998,10 @@ AutoBuffer<_Tp, fixed_size>::allocate(size_t _size) return; } deallocate(); + sz = _size; if(_size > fixed_size) { ptr = new _Tp[_size]; - sz = _size; } } @@ -863,31 +1044,166 @@ template inline size_t AutoBuffer<_Tp, fixed_size>::size() const { return sz; } -template inline -AutoBuffer<_Tp, fixed_size>::operator _Tp* () -{ return ptr; } +//! @endcond -template inline -AutoBuffer<_Tp, fixed_size>::operator const _Tp* () const -{ return ptr; } -#ifndef OPENCV_NOSTL -template<> inline std::string CommandLineParser::get(int index, bool space_delete) const +// Basic Node class for tree building +template +class CV_EXPORTS Node { - return get(index, space_delete); -} -template<> inline std::string CommandLineParser::get(const String& name, bool space_delete) const +public: + Node() + { + m_pParent = 0; + } + Node(OBJECT& payload) : m_payload(payload) + { + m_pParent = 0; + } + ~Node() + { + removeChilds(); + if (m_pParent) + { + int idx = m_pParent->findChild(this); + if (idx >= 0) + m_pParent->m_childs.erase(m_pParent->m_childs.begin() + idx); + } + } + + Node* findChild(OBJECT& payload) const + { + for(size_t i = 0; i < this->m_childs.size(); i++) + { + if(this->m_childs[i]->m_payload == payload) + return this->m_childs[i]; + } + return NULL; + } + + int findChild(Node *pNode) const + { + for (size_t i = 0; i < this->m_childs.size(); i++) + { + if(this->m_childs[i] == pNode) + return (int)i; + } + return -1; + } + + void addChild(Node *pNode) + { + if(!pNode) + return; + + CV_Assert(pNode->m_pParent == 0); + pNode->m_pParent = this; + this->m_childs.push_back(pNode); + } + + void removeChilds() + { + for(size_t i = 0; i < m_childs.size(); i++) + { + m_childs[i]->m_pParent = 0; // avoid excessive parent vector trimming + delete m_childs[i]; + } + m_childs.clear(); + } + + int getDepth() + { + int count = 0; + Node *pParent = m_pParent; + while(pParent) count++, pParent = pParent->m_pParent; + return count; + } + +public: + OBJECT m_payload; + Node* m_pParent; + std::vector*> m_childs; +}; + + +namespace samples { + +//! @addtogroup core_utils_samples +// This section describes utility functions for OpenCV samples. +// +// @note Implementation of these utilities is not thread-safe. +// +//! @{ + +/** @brief Try to find requested data file + +Search directories: + +1. Directories passed via `addSamplesDataSearchPath()` +2. OPENCV_SAMPLES_DATA_PATH_HINT environment variable +3. OPENCV_SAMPLES_DATA_PATH environment variable + If parameter value is not empty and nothing is found then stop searching. +4. Detects build/install path based on: + a. current working directory (CWD) + b. and/or binary module location (opencv_core/opencv_world, doesn't work with static linkage) +5. Scan `/{,data,samples/data}` directories if build directory is detected or the current directory is in source tree. +6. Scan `/share/OpenCV` directory if install directory is detected. + +@see cv::utils::findDataFile + +@param relative_path Relative path to data file +@param required Specify "file not found" handling. + If true, function prints information message and raises cv::Exception. + If false, function returns empty result +@param silentMode Disables messages +@return Returns path (absolute or relative to the current directory) or empty string if file is not found +*/ +CV_EXPORTS_W cv::String findFile(const cv::String& relative_path, bool required = true, bool silentMode = false); + +CV_EXPORTS_W cv::String findFileOrKeep(const cv::String& relative_path, bool silentMode = false); + +inline cv::String findFileOrKeep(const cv::String& relative_path, bool silentMode) { - return get(name, space_delete); + cv::String res = findFile(relative_path, false, silentMode); + if (res.empty()) + return relative_path; + return res; } -#endif // OPENCV_NOSTL -//! @endcond +/** @brief Override search data path by adding new search location + +Use this only to override default behavior +Passed paths are used in LIFO order. + +@param path Path to used samples data +*/ +CV_EXPORTS_W void addSamplesDataSearchPath(const cv::String& path); + +/** @brief Append samples search data sub directory + +General usage is to add OpenCV modules name (`/modules//samples/data` -> `/samples/data` + `modules//samples/data`). +Passed subdirectories are used in LIFO order. + +@param subdir samples data sub directory +*/ +CV_EXPORTS_W void addSamplesDataSearchSubDirectory(const cv::String& subdir); + +//! @} +} // namespace samples + +namespace utils { + +CV_EXPORTS int getThreadID(); + +} // namespace } //namespace cv -#ifndef DISABLE_OPENCV_24_COMPATIBILITY -#include "opencv2/core/core_c.h" +#ifdef CV_COLLECT_IMPL_DATA +#include "opencv2/core/utils/instrumentation.hpp" +#else +/// Collect implementation data on OpenCV function call. Requires ENABLE_IMPL_COLLECTION build option. +#define CV_IMPL_ADD(impl) #endif -#endif //__OPENCV_CORE_UTILITY_H__ +#endif //OPENCV_CORE_UTILITY_H diff --git a/IPL/include/opencv/opencv2/core/utils/allocator_stats.hpp b/IPL/include/opencv/opencv2/core/utils/allocator_stats.hpp new file mode 100644 index 0000000..79e9338 --- /dev/null +++ b/IPL/include/opencv/opencv2/core/utils/allocator_stats.hpp @@ -0,0 +1,29 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#ifndef OPENCV_CORE_ALLOCATOR_STATS_HPP +#define OPENCV_CORE_ALLOCATOR_STATS_HPP + +#include "../cvdef.h" + +namespace cv { namespace utils { + +class AllocatorStatisticsInterface +{ +protected: + AllocatorStatisticsInterface() {} + virtual ~AllocatorStatisticsInterface() {} +public: + virtual uint64_t getCurrentUsage() const = 0; + virtual uint64_t getTotalUsage() const = 0; + virtual uint64_t getNumberOfAllocations() const = 0; + virtual uint64_t getPeakUsage() const = 0; + + /** set peak usage = current usage */ + virtual void resetPeakUsage() = 0; +}; + +}} // namespace + +#endif // OPENCV_CORE_ALLOCATOR_STATS_HPP diff --git a/IPL/include/opencv/opencv2/core/utils/allocator_stats.impl.hpp b/IPL/include/opencv/opencv2/core/utils/allocator_stats.impl.hpp new file mode 100644 index 0000000..61fcf15 --- /dev/null +++ b/IPL/include/opencv/opencv2/core/utils/allocator_stats.impl.hpp @@ -0,0 +1,150 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#ifndef OPENCV_CORE_ALLOCATOR_STATS_IMPL_HPP +#define OPENCV_CORE_ALLOCATOR_STATS_IMPL_HPP + +#include "./allocator_stats.hpp" + +#ifdef CV_CXX11 +#include +#endif + +//#define OPENCV_DISABLE_ALLOCATOR_STATS + +namespace cv { namespace utils { + +#ifndef OPENCV_ALLOCATOR_STATS_COUNTER_TYPE +#if defined(__GNUC__) && (\ + (defined(__SIZEOF_POINTER__) && __SIZEOF_POINTER__ == 4) || \ + (defined(__GCC_HAVE_SYNC_COMPARE_AND_SWAP_4) && !defined(__GCC_HAVE_SYNC_COMPARE_AND_SWAP_8)) \ + ) +#define OPENCV_ALLOCATOR_STATS_COUNTER_TYPE int +#endif +#endif + +#ifndef OPENCV_ALLOCATOR_STATS_COUNTER_TYPE +#define OPENCV_ALLOCATOR_STATS_COUNTER_TYPE long long +#endif + +#ifdef CV__ALLOCATOR_STATS_LOG +namespace { +#endif + +class AllocatorStatistics : public AllocatorStatisticsInterface +{ +#ifdef OPENCV_DISABLE_ALLOCATOR_STATS + +public: + AllocatorStatistics() {} + ~AllocatorStatistics() CV_OVERRIDE {} + + uint64_t getCurrentUsage() const CV_OVERRIDE { return 0; } + uint64_t getTotalUsage() const CV_OVERRIDE { return 0; } + uint64_t getNumberOfAllocations() const CV_OVERRIDE { return 0; } + uint64_t getPeakUsage() const CV_OVERRIDE { return 0; } + + /** set peak usage = current usage */ + void resetPeakUsage() CV_OVERRIDE {}; + + void onAllocate(size_t /*sz*/) {} + void onFree(size_t /*sz*/) {} + +#elif defined(CV_CXX11) + +protected: + typedef OPENCV_ALLOCATOR_STATS_COUNTER_TYPE counter_t; + std::atomic curr, total, total_allocs, peak; +public: + AllocatorStatistics() {} + ~AllocatorStatistics() CV_OVERRIDE {} + + uint64_t getCurrentUsage() const CV_OVERRIDE { return (uint64_t)curr.load(); } + uint64_t getTotalUsage() const CV_OVERRIDE { return (uint64_t)total.load(); } + uint64_t getNumberOfAllocations() const CV_OVERRIDE { return (uint64_t)total_allocs.load(); } + uint64_t getPeakUsage() const CV_OVERRIDE { return (uint64_t)peak.load(); } + + /** set peak usage = current usage */ + void resetPeakUsage() CV_OVERRIDE { peak.store(curr.load()); } + + // Controller interface + void onAllocate(size_t sz) + { +#ifdef CV__ALLOCATOR_STATS_LOG + CV__ALLOCATOR_STATS_LOG(cv::format("allocate: %lld (curr=%lld)", (long long int)sz, (long long int)curr.load())); +#endif + + counter_t new_curr = curr.fetch_add((counter_t)sz) + (counter_t)sz; + + // peak = std::max((uint64_t)peak, new_curr); + auto prev_peak = peak.load(); + while (prev_peak < new_curr) + { + if (peak.compare_exchange_weak(prev_peak, new_curr)) + break; + } + // end of peak = max(...) + + total += (counter_t)sz; + total_allocs++; + } + void onFree(size_t sz) + { +#ifdef CV__ALLOCATOR_STATS_LOG + CV__ALLOCATOR_STATS_LOG(cv::format("free: %lld (curr=%lld)", (long long int)sz, (long long int)curr.load())); +#endif + curr -= (counter_t)sz; + } + +#else // non C++11 + +protected: + typedef OPENCV_ALLOCATOR_STATS_COUNTER_TYPE counter_t; + volatile counter_t curr, total, total_allocs, peak; // overflow is possible, CV_XADD operates with 'int' only +public: + AllocatorStatistics() + : curr(0), total(0), total_allocs(0), peak(0) + {} + ~AllocatorStatistics() CV_OVERRIDE {} + + uint64_t getCurrentUsage() const CV_OVERRIDE { return (uint64_t)curr; } + uint64_t getTotalUsage() const CV_OVERRIDE { return (uint64_t)total; } + uint64_t getNumberOfAllocations() const CV_OVERRIDE { return (uint64_t)total_allocs; } + uint64_t getPeakUsage() const CV_OVERRIDE { return (uint64_t)peak; } + + void resetPeakUsage() CV_OVERRIDE { peak = curr; } + + // Controller interface + void onAllocate(size_t sz) + { +#ifdef CV__ALLOCATOR_STATS_LOG + CV__ALLOCATOR_STATS_LOG(cv::format("allocate: %lld (curr=%lld)", (long long int)sz, (long long int)curr)); +#endif + + counter_t new_curr = (counter_t)CV_XADD(&curr, (counter_t)sz) + (counter_t)sz; + + peak = std::max((counter_t)peak, new_curr); // non-thread safe + + //CV_XADD(&total, (uint64_t)sz); // overflow with int, non-reliable... + total += sz; + + CV_XADD(&total_allocs, (counter_t)1); + } + void onFree(size_t sz) + { +#ifdef CV__ALLOCATOR_STATS_LOG + CV__ALLOCATOR_STATS_LOG(cv::format("free: %lld (curr=%lld)", (long long int)sz, (long long int)curr)); +#endif + CV_XADD(&curr, (counter_t)-sz); + } +#endif +}; + +#ifdef CV__ALLOCATOR_STATS_LOG +} // namespace +#endif + +}} // namespace + +#endif // OPENCV_CORE_ALLOCATOR_STATS_IMPL_HPP diff --git a/IPL/include/opencv/opencv2/core/utils/filesystem.hpp b/IPL/include/opencv/opencv2/core/utils/filesystem.hpp new file mode 100644 index 0000000..a98d220 --- /dev/null +++ b/IPL/include/opencv/opencv2/core/utils/filesystem.hpp @@ -0,0 +1,82 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#ifndef OPENCV_UTILS_FILESYSTEM_HPP +#define OPENCV_UTILS_FILESYSTEM_HPP + +namespace cv { namespace utils { namespace fs { + + +CV_EXPORTS bool exists(const cv::String& path); +CV_EXPORTS bool isDirectory(const cv::String& path); + +CV_EXPORTS void remove_all(const cv::String& path); + + +CV_EXPORTS cv::String getcwd(); + +/** @brief Converts path p to a canonical absolute path + * Symlinks are processed if there is support for them on running platform. + * + * @param path input path. Target file/directory should exist. + */ +CV_EXPORTS cv::String canonical(const cv::String& path); + +/** Join path components */ +CV_EXPORTS cv::String join(const cv::String& base, const cv::String& path); + +/** Get parent directory */ +CV_EXPORTS cv::String getParent(const cv::String &path); +CV_EXPORTS std::wstring getParent(const std::wstring& path); + +/** + * Generate a list of all files that match the globbing pattern. + * + * Result entries are prefixed by base directory path. + * + * @param directory base directory + * @param pattern filter pattern (based on '*'/'?' symbols). Use empty string to disable filtering and return all results + * @param[out] result result of globing. + * @param recursive scan nested directories too + * @param includeDirectories include directories into results list + */ +CV_EXPORTS void glob(const cv::String& directory, const cv::String& pattern, + CV_OUT std::vector& result, + bool recursive = false, bool includeDirectories = false); + +/** + * Generate a list of all files that match the globbing pattern. + * + * @param directory base directory + * @param pattern filter pattern (based on '*'/'?' symbols). Use empty string to disable filtering and return all results + * @param[out] result globbing result with relative paths from base directory + * @param recursive scan nested directories too + * @param includeDirectories include directories into results list + */ +CV_EXPORTS void glob_relative(const cv::String& directory, const cv::String& pattern, + CV_OUT std::vector& result, + bool recursive = false, bool includeDirectories = false); + + +CV_EXPORTS bool createDirectory(const cv::String& path); +CV_EXPORTS bool createDirectories(const cv::String& path); + +#ifdef __OPENCV_BUILD +// TODO +//CV_EXPORTS cv::String getTempDirectory(); + +/** + * @brief Returns directory to store OpenCV cache files + * Create sub-directory in common OpenCV cache directory if it doesn't exist. + * @param sub_directory_name name of sub-directory. NULL or "" value asks to return root cache directory. + * @param configuration_name optional name of configuration parameter name which overrides default behavior. + * @return Path to cache directory. Returns empty string if cache directories support is not available. Returns "disabled" if cache disabled by user. + */ +CV_EXPORTS cv::String getCacheDirectory(const char* sub_directory_name, const char* configuration_name = NULL); + +#endif + +}}} // namespace + +#endif // OPENCV_UTILS_FILESYSTEM_HPP diff --git a/IPL/include/opencv/opencv2/core/utils/instrumentation.hpp b/IPL/include/opencv/opencv2/core/utils/instrumentation.hpp new file mode 100644 index 0000000..3639867 --- /dev/null +++ b/IPL/include/opencv/opencv2/core/utils/instrumentation.hpp @@ -0,0 +1,125 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#ifndef OPENCV_UTILS_INSTR_HPP +#define OPENCV_UTILS_INSTR_HPP + +#include +#include + +namespace cv { + +//! @addtogroup core_utils +//! @{ + +#ifdef CV_COLLECT_IMPL_DATA +CV_EXPORTS void setImpl(int flags); // set implementation flags and reset storage arrays +CV_EXPORTS void addImpl(int flag, const char* func = 0); // add implementation and function name to storage arrays +// Get stored implementation flags and functions names arrays +// Each implementation entry correspond to function name entry, so you can find which implementation was executed in which function +CV_EXPORTS int getImpl(std::vector &impl, std::vector &funName); + +CV_EXPORTS bool useCollection(); // return implementation collection state +CV_EXPORTS void setUseCollection(bool flag); // set implementation collection state + +#define CV_IMPL_PLAIN 0x01 // native CPU OpenCV implementation +#define CV_IMPL_OCL 0x02 // OpenCL implementation +#define CV_IMPL_IPP 0x04 // IPP implementation +#define CV_IMPL_MT 0x10 // multithreaded implementation + +#undef CV_IMPL_ADD +#define CV_IMPL_ADD(impl) \ + if(cv::useCollection()) \ + { \ + cv::addImpl(impl, CV_Func); \ + } +#endif + +// Instrumentation external interface +namespace instr +{ + +#if !defined OPENCV_ABI_CHECK + +enum TYPE +{ + TYPE_GENERAL = 0, // OpenCV API function, e.g. exported function + TYPE_MARKER, // Information marker + TYPE_WRAPPER, // Wrapper function for implementation + TYPE_FUN, // Simple function call +}; + +enum IMPL +{ + IMPL_PLAIN = 0, + IMPL_IPP, + IMPL_OPENCL, +}; + +struct NodeDataTls +{ + NodeDataTls() + { + m_ticksTotal = 0; + } + uint64 m_ticksTotal; +}; + +class CV_EXPORTS NodeData +{ +public: + NodeData(const char* funName = 0, const char* fileName = NULL, int lineNum = 0, void* retAddress = NULL, bool alwaysExpand = false, cv::instr::TYPE instrType = TYPE_GENERAL, cv::instr::IMPL implType = IMPL_PLAIN); + NodeData(NodeData &ref); + ~NodeData(); + NodeData& operator=(const NodeData&); + + cv::String m_funName; + cv::instr::TYPE m_instrType; + cv::instr::IMPL m_implType; + const char* m_fileName; + int m_lineNum; + void* m_retAddress; + bool m_alwaysExpand; + bool m_funError; + + volatile int m_counter; + volatile uint64 m_ticksTotal; + TLSDataAccumulator m_tls; + int m_threads; + + // No synchronization + double getTotalMs() const { return ((double)m_ticksTotal / cv::getTickFrequency()) * 1000; } + double getMeanMs() const { return (((double)m_ticksTotal/m_counter) / cv::getTickFrequency()) * 1000; } +}; +bool operator==(const NodeData& lhs, const NodeData& rhs); + +typedef Node InstrNode; + +CV_EXPORTS InstrNode* getTrace(); + +#endif // !defined OPENCV_ABI_CHECK + + +CV_EXPORTS bool useInstrumentation(); +CV_EXPORTS void setUseInstrumentation(bool flag); +CV_EXPORTS void resetTrace(); + +enum FLAGS +{ + FLAGS_NONE = 0, + FLAGS_MAPPING = 0x01, + FLAGS_EXPAND_SAME_NAMES = 0x02, +}; + +CV_EXPORTS void setFlags(FLAGS modeFlags); +static inline void setFlags(int modeFlags) { setFlags((FLAGS)modeFlags); } +CV_EXPORTS FLAGS getFlags(); + +} // namespace instr + +//! @} + +} // namespace + +#endif // OPENCV_UTILS_TLS_HPP diff --git a/IPL/include/opencv/opencv2/core/utils/logger.defines.hpp b/IPL/include/opencv/opencv2/core/utils/logger.defines.hpp new file mode 100644 index 0000000..7d73f02 --- /dev/null +++ b/IPL/include/opencv/opencv2/core/utils/logger.defines.hpp @@ -0,0 +1,42 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#ifndef OPENCV_LOGGER_DEFINES_HPP +#define OPENCV_LOGGER_DEFINES_HPP + +//! @addtogroup core_logging +//! @{ + +// Supported logging levels and their semantic +#define CV_LOG_LEVEL_SILENT 0 //!< for using in setLogLevel() call +#define CV_LOG_LEVEL_FATAL 1 //!< Fatal (critical) error (unrecoverable internal error) +#define CV_LOG_LEVEL_ERROR 2 //!< Error message +#define CV_LOG_LEVEL_WARN 3 //!< Warning message +#define CV_LOG_LEVEL_INFO 4 //!< Info message +#define CV_LOG_LEVEL_DEBUG 5 //!< Debug message. Disabled in the "Release" build. +#define CV_LOG_LEVEL_VERBOSE 6 //!< Verbose (trace) messages. Requires verbosity level. Disabled in the "Release" build. + +namespace cv { +namespace utils { +namespace logging { + +//! Supported logging levels and their semantic +enum LogLevel { + LOG_LEVEL_SILENT = 0, //!< for using in setLogVevel() call + LOG_LEVEL_FATAL = 1, //!< Fatal (critical) error (unrecoverable internal error) + LOG_LEVEL_ERROR = 2, //!< Error message + LOG_LEVEL_WARNING = 3, //!< Warning message + LOG_LEVEL_INFO = 4, //!< Info message + LOG_LEVEL_DEBUG = 5, //!< Debug message. Disabled in the "Release" build. + LOG_LEVEL_VERBOSE = 6, //!< Verbose (trace) messages. Requires verbosity level. Disabled in the "Release" build. +#ifndef CV_DOXYGEN + ENUM_LOG_LEVEL_FORCE_INT = INT_MAX +#endif +}; + +}}} // namespace + +//! @} + +#endif // OPENCV_LOGGER_DEFINES_HPP diff --git a/IPL/include/opencv/opencv2/core/utils/logger.hpp b/IPL/include/opencv/opencv2/core/utils/logger.hpp new file mode 100644 index 0000000..accb860 --- /dev/null +++ b/IPL/include/opencv/opencv2/core/utils/logger.hpp @@ -0,0 +1,218 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#ifndef OPENCV_LOGGER_HPP +#define OPENCV_LOGGER_HPP + +#include +#include +#include // INT_MAX + +#include "logger.defines.hpp" +#include "logtag.hpp" + +namespace cv { +namespace utils { +namespace logging { + +//! @addtogroup core_logging +//! @{ + +/** Set global logging level +@return previous logging level +*/ +CV_EXPORTS LogLevel setLogLevel(LogLevel logLevel); +/** Get global logging level */ +CV_EXPORTS LogLevel getLogLevel(); + +CV_EXPORTS void registerLogTag(cv::utils::logging::LogTag* plogtag); + +CV_EXPORTS void setLogTagLevel(const char* tag, cv::utils::logging::LogLevel level); + +CV_EXPORTS cv::utils::logging::LogLevel getLogTagLevel(const char* tag); + +namespace internal { + +/** Get global log tag */ +CV_EXPORTS cv::utils::logging::LogTag* getGlobalLogTag(); + +/** Write log message */ +CV_EXPORTS void writeLogMessage(LogLevel logLevel, const char* message); + +/** Write log message */ +CV_EXPORTS void writeLogMessageEx(LogLevel logLevel, const char* tag, const char* file, int line, const char* func, const char* message); + +} // namespace + +struct LogTagAuto + : public LogTag +{ + inline LogTagAuto(const char* _name, LogLevel _level) + : LogTag(_name, _level) + { + registerLogTag(this); + } +}; + +/** + * \def CV_LOG_STRIP_LEVEL + * + * Define CV_LOG_STRIP_LEVEL=CV_LOG_LEVEL_[DEBUG|INFO|WARN|ERROR|FATAL|SILENT] to compile out anything at that and before that logging level + */ +#ifndef CV_LOG_STRIP_LEVEL +# if defined NDEBUG +# define CV_LOG_STRIP_LEVEL CV_LOG_LEVEL_DEBUG +# else +# define CV_LOG_STRIP_LEVEL CV_LOG_LEVEL_VERBOSE +# endif +#endif + +#define CV_LOGTAG_PTR_CAST(expr) static_cast(expr) + +// CV_LOGTAG_EXPAND_NAME is intended to be re-defined (undef and then define again) +// to allows logging users to use a shorter name argument when calling +// CV_LOG_WITH_TAG or its related macros such as CV_LOG_INFO. +// +// This macro is intended to modify the tag argument as a string (token), via +// preprocessor token pasting or metaprogramming techniques. A typical usage +// is to apply a prefix, such as +// ...... #define CV_LOGTAG_EXPAND_NAME(tag) cv_logtag_##tag +// +// It is permitted to re-define to a hard-coded expression, ignoring the tag. +// This would work identically like the CV_LOGTAG_FALLBACK macro. +// +// Important: When the logging macro is called with tag being NULL, a user-defined +// CV_LOGTAG_EXPAND_NAME may expand it into cv_logtag_0, cv_logtag_NULL, or +// cv_logtag_nullptr. Use with care. Also be mindful of C++ symbol redefinitions. +// +// If there is significant amount of logging code with tag being NULL, it is +// recommended to use (re-define) CV_LOGTAG_FALLBACK to inject locally a default +// tag at the beginning of a compilation unit, to minimize lines of code changes. +// +#define CV_LOGTAG_EXPAND_NAME(tag) tag + +// CV_LOGTAG_FALLBACK is intended to be re-defined (undef and then define again) +// by any other compilation units to provide a log tag when the logging statement +// does not specify one. The macro needs to expand into a C++ expression that can +// be static_cast into (cv::utils::logging::LogTag*). Null (nullptr) is permitted. +#define CV_LOGTAG_FALLBACK nullptr + +// CV_LOGTAG_GLOBAL is the tag used when a log tag is not specified in the logging +// statement nor the compilation unit. The macro needs to expand into a C++ +// expression that can be static_cast into (cv::utils::logging::LogTag*). Must be +// non-null. Do not re-define. +#define CV_LOGTAG_GLOBAL cv::utils::logging::internal::getGlobalLogTag() + +#define CV_LOG_WITH_TAG(tag, msgLevel, extra_check0, extra_check1, ...) \ + for(;;) { \ + extra_check0; \ + const auto cv_temp_msglevel = (cv::utils::logging::LogLevel)(msgLevel); \ + if (cv_temp_msglevel >= (CV_LOG_STRIP_LEVEL)) break; \ + auto cv_temp_logtagptr = CV_LOGTAG_PTR_CAST(CV_LOGTAG_EXPAND_NAME(tag)); \ + if (!cv_temp_logtagptr) cv_temp_logtagptr = CV_LOGTAG_PTR_CAST(CV_LOGTAG_FALLBACK); \ + if (!cv_temp_logtagptr) cv_temp_logtagptr = CV_LOGTAG_PTR_CAST(CV_LOGTAG_GLOBAL); \ + if (cv_temp_logtagptr && (cv_temp_msglevel > cv_temp_logtagptr->level)) break; \ + extra_check1; \ + std::stringstream cv_temp_logstream; \ + cv_temp_logstream << __VA_ARGS__; \ + cv::utils::logging::internal::writeLogMessageEx( \ + cv_temp_msglevel, \ + (cv_temp_logtagptr ? cv_temp_logtagptr->name : nullptr), \ + __FILE__, \ + __LINE__, \ + CV_Func, \ + cv_temp_logstream.str().c_str()); \ + break; \ + } + +#define CV_LOG_FATAL(tag, ...) CV_LOG_WITH_TAG(tag, cv::utils::logging::LOG_LEVEL_FATAL, , , __VA_ARGS__) +#define CV_LOG_ERROR(tag, ...) CV_LOG_WITH_TAG(tag, cv::utils::logging::LOG_LEVEL_ERROR, , , __VA_ARGS__) +#define CV_LOG_WARNING(tag, ...) CV_LOG_WITH_TAG(tag, cv::utils::logging::LOG_LEVEL_WARNING, , , __VA_ARGS__) +#define CV_LOG_INFO(tag, ...) CV_LOG_WITH_TAG(tag, cv::utils::logging::LOG_LEVEL_INFO, , , __VA_ARGS__) +#define CV_LOG_DEBUG(tag, ...) CV_LOG_WITH_TAG(tag, cv::utils::logging::LOG_LEVEL_DEBUG, , , __VA_ARGS__) +#define CV_LOG_VERBOSE(tag, v, ...) CV_LOG_WITH_TAG(tag, (cv::utils::logging::LOG_LEVEL_VERBOSE + (int)(v)), , , __VA_ARGS__) + +#if CV_LOG_STRIP_LEVEL <= CV_LOG_LEVEL_INFO +#undef CV_LOG_INFO +#define CV_LOG_INFO(tag, ...) +#endif + +#if CV_LOG_STRIP_LEVEL <= CV_LOG_LEVEL_DEBUG +#undef CV_LOG_DEBUG +#define CV_LOG_DEBUG(tag, ...) +#endif + +#if CV_LOG_STRIP_LEVEL <= CV_LOG_LEVEL_VERBOSE +#undef CV_LOG_VERBOSE +#define CV_LOG_VERBOSE(tag, v, ...) +#endif + +//! @cond IGNORED +#define CV__LOG_ONCE_CHECK_PRE \ + static bool _cv_log_once_ ## __LINE__ = false; \ + if (_cv_log_once_ ## __LINE__) break; + +#define CV__LOG_ONCE_CHECK_POST \ + _cv_log_once_ ## __LINE__ = true; + +#define CV__LOG_IF_CHECK(logging_cond) \ + if (!(logging_cond)) break; + +//! @endcond + + +// CV_LOG_ONCE_XXX macros + +#define CV_LOG_ONCE_ERROR(tag, ...) CV_LOG_WITH_TAG(tag, cv::utils::logging::LOG_LEVEL_ERROR, CV__LOG_ONCE_CHECK_PRE, CV__LOG_ONCE_CHECK_POST, __VA_ARGS__) +#define CV_LOG_ONCE_WARNING(tag, ...) CV_LOG_WITH_TAG(tag, cv::utils::logging::LOG_LEVEL_WARNING, CV__LOG_ONCE_CHECK_PRE, CV__LOG_ONCE_CHECK_POST, __VA_ARGS__) +#define CV_LOG_ONCE_INFO(tag, ...) CV_LOG_WITH_TAG(tag, cv::utils::logging::LOG_LEVEL_INFO, CV__LOG_ONCE_CHECK_PRE, CV__LOG_ONCE_CHECK_POST, __VA_ARGS__) +#define CV_LOG_ONCE_DEBUG(tag, ...) CV_LOG_WITH_TAG(tag, cv::utils::logging::LOG_LEVEL_DEBUG, CV__LOG_ONCE_CHECK_PRE, CV__LOG_ONCE_CHECK_POST, __VA_ARGS__) +#define CV_LOG_ONCE_VERBOSE(tag, v, ...) CV_LOG_WITH_TAG(tag, (cv::utils::logging::LOG_LEVEL_VERBOSE + (int)(v)), CV__LOG_ONCE_CHECK_PRE, CV__LOG_ONCE_CHECK_POST, __VA_ARGS__) + +#if CV_LOG_STRIP_LEVEL <= CV_LOG_LEVEL_INFO +#undef CV_LOG_ONCE_INFO +#define CV_LOG_ONCE_INFO(tag, ...) +#endif + +#if CV_LOG_STRIP_LEVEL <= CV_LOG_LEVEL_DEBUG +#undef CV_LOG_ONCE_DEBUG +#define CV_LOG_ONCE_DEBUG(tag, ...) +#endif + +#if CV_LOG_STRIP_LEVEL <= CV_LOG_LEVEL_VERBOSE +#undef CV_LOG_ONCE_VERBOSE +#define CV_LOG_ONCE_VERBOSE(tag, v, ...) +#endif + + +// CV_LOG_IF_XXX macros + +#define CV_LOG_IF_FATAL(tag, logging_cond, ...) CV_LOG_WITH_TAG(tag, cv::utils::logging::LOG_LEVEL_FATAL, , CV__LOG_IF_CHECK(logging_cond), __VA_ARGS__) +#define CV_LOG_IF_ERROR(tag, logging_cond, ...) CV_LOG_WITH_TAG(tag, cv::utils::logging::LOG_LEVEL_ERROR, , CV__LOG_IF_CHECK(logging_cond), __VA_ARGS__) +#define CV_LOG_IF_WARNING(tag, logging_cond, ...) CV_LOG_WITH_TAG(tag, cv::utils::logging::LOG_LEVEL_WARNING, , CV__LOG_IF_CHECK(logging_cond), __VA_ARGS__) +#define CV_LOG_IF_INFO(tag, logging_cond, ...) CV_LOG_WITH_TAG(tag, cv::utils::logging::LOG_LEVEL_INFO, , CV__LOG_IF_CHECK(logging_cond), __VA_ARGS__) +#define CV_LOG_IF_DEBUG(tag, logging_cond, ...) CV_LOG_WITH_TAG(tag, cv::utils::logging::LOG_LEVEL_DEBUG, , CV__LOG_IF_CHECK(logging_cond), __VA_ARGS__) +#define CV_LOG_IF_VERBOSE(tag, v, logging_cond, ...) CV_LOG_WITH_TAG(tag, (cv::utils::logging::LOG_LEVEL_VERBOSE + (int)(v)), , CV__LOG_IF_CHECK(logging_cond), __VA_ARGS__) + +#if CV_LOG_STRIP_LEVEL <= CV_LOG_LEVEL_INFO +#undef CV_LOG_IF_INFO +#define CV_LOG_IF_INFO(tag, logging_cond, ...) +#endif + +#if CV_LOG_STRIP_LEVEL <= CV_LOG_LEVEL_DEBUG +#undef CV_LOG_IF_DEBUG +#define CV_LOG_IF_DEBUG(tag, logging_cond, ...) +#endif + +#if CV_LOG_STRIP_LEVEL <= CV_LOG_LEVEL_VERBOSE +#undef CV_LOG_IF_VERBOSE +#define CV_LOG_IF_VERBOSE(tag, v, logging_cond, ...) +#endif + + +//! @} + +}}} // namespace + +#endif // OPENCV_LOGGER_HPP diff --git a/IPL/include/opencv/opencv2/core/utils/logtag.hpp b/IPL/include/opencv/opencv2/core/utils/logtag.hpp new file mode 100644 index 0000000..4089720 --- /dev/null +++ b/IPL/include/opencv/opencv2/core/utils/logtag.hpp @@ -0,0 +1,28 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#ifndef OPENCV_CORE_LOGTAG_HPP +#define OPENCV_CORE_LOGTAG_HPP + +#include "opencv2/core/cvstd.hpp" +#include "logger.defines.hpp" + +namespace cv { +namespace utils { +namespace logging { + +struct LogTag +{ + const char* name; + LogLevel level; + + inline LogTag(const char* _name, LogLevel _level) + : name(_name) + , level(_level) + {} +}; + +}}} + +#endif diff --git a/IPL/include/opencv/opencv2/core/utils/tls.hpp b/IPL/include/opencv/opencv2/core/utils/tls.hpp new file mode 100644 index 0000000..697a7b0 --- /dev/null +++ b/IPL/include/opencv/opencv2/core/utils/tls.hpp @@ -0,0 +1,233 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#ifndef OPENCV_UTILS_TLS_HPP +#define OPENCV_UTILS_TLS_HPP + +#include + +namespace cv { + +//! @addtogroup core_utils +//! @{ + +namespace details { class TlsStorage; } + +/** TLS container base implementation + * + * Don't use directly. + * + * @sa TLSData, TLSDataAccumulator templates + */ +class CV_EXPORTS TLSDataContainer +{ +protected: + TLSDataContainer(); + virtual ~TLSDataContainer(); + + /// @deprecated use detachData() instead + void gatherData(std::vector &data) const; + /// get TLS data and detach all data from threads (similar to cleanup() call) + void detachData(std::vector& data); + + void* getData() const; + void release(); + +protected: + virtual void* createDataInstance() const = 0; + virtual void deleteDataInstance(void* pData) const = 0; + +private: + int key_; + + friend class cv::details::TlsStorage; // core/src/system.cpp + +public: + void cleanup(); //!< Release created TLS data container objects. It is similar to release() call, but it keeps TLS container valid. + +private: + // Disable copy/assign (noncopyable pattern) + TLSDataContainer(TLSDataContainer &) = delete; + TLSDataContainer& operator =(const TLSDataContainer &) = delete; +}; + + +/** @brief Simple TLS data class + * + * @sa TLSDataAccumulator + */ +template +class TLSData : protected TLSDataContainer +{ +public: + inline TLSData() {} + inline ~TLSData() { release(); } + + inline T* get() const { return (T*)getData(); } //!< Get data associated with key + inline T& getRef() const { T* ptr = (T*)getData(); CV_DbgAssert(ptr); return *ptr; } //!< Get data associated with key + + /// Release associated thread data + inline void cleanup() + { + TLSDataContainer::cleanup(); + } + +protected: + /// Wrapper to allocate data by template + virtual void* createDataInstance() const CV_OVERRIDE { return new T; } + /// Wrapper to release data by template + virtual void deleteDataInstance(void* pData) const CV_OVERRIDE { delete (T*)pData; } +}; + + +/// TLS data accumulator with gathering methods +template +class TLSDataAccumulator : public TLSData +{ + mutable cv::Mutex mutex; + mutable std::vector dataFromTerminatedThreads; + std::vector detachedData; + bool cleanupMode; +public: + TLSDataAccumulator() : cleanupMode(false) {} + ~TLSDataAccumulator() + { + release(); + } + + /** @brief Get data from all threads + * @deprecated replaced by detachData() + * + * Lifetime of vector data is valid until next detachData()/cleanup()/release() calls + * + * @param[out] data result buffer (should be empty) + */ + void gather(std::vector &data) const + { + CV_Assert(cleanupMode == false); // state is not valid + CV_Assert(data.empty()); + { + std::vector &dataVoid = reinterpret_cast&>(data); + TLSDataContainer::gatherData(dataVoid); + } + { + AutoLock lock(mutex); + data.reserve(data.size() + dataFromTerminatedThreads.size()); + for (typename std::vector::const_iterator i = dataFromTerminatedThreads.begin(); i != dataFromTerminatedThreads.end(); ++i) + { + data.push_back((T*)*i); + } + } + } + + /** @brief Get and detach data from all threads + * + * Call cleanupDetachedData() when returned vector is not needed anymore. + * + * @return Vector with associated data. Content is preserved (including lifetime of attached data pointers) until next detachData()/cleanupDetachedData()/cleanup()/release() calls + */ + std::vector& detachData() + { + CV_Assert(cleanupMode == false); // state is not valid + std::vector dataVoid; + { + TLSDataContainer::detachData(dataVoid); + } + { + AutoLock lock(mutex); + detachedData.reserve(dataVoid.size() + dataFromTerminatedThreads.size()); + for (typename std::vector::const_iterator i = dataFromTerminatedThreads.begin(); i != dataFromTerminatedThreads.end(); ++i) + { + detachedData.push_back((T*)*i); + } + dataFromTerminatedThreads.clear(); + for (typename std::vector::const_iterator i = dataVoid.begin(); i != dataVoid.end(); ++i) + { + detachedData.push_back((T*)(void*)*i); + } + } + dataVoid.clear(); + return detachedData; + } + + /// Release associated thread data returned by detachData() call + void cleanupDetachedData() + { + AutoLock lock(mutex); + cleanupMode = true; + _cleanupDetachedData(); + cleanupMode = false; + } + + /// Release associated thread data + void cleanup() + { + cleanupMode = true; + TLSDataContainer::cleanup(); + + AutoLock lock(mutex); + _cleanupDetachedData(); + _cleanupTerminatedData(); + cleanupMode = false; + } + + /// Release associated thread data and free TLS key + void release() + { + cleanupMode = true; + TLSDataContainer::release(); + { + AutoLock lock(mutex); + _cleanupDetachedData(); + _cleanupTerminatedData(); + } + } + +protected: + // synchronized + void _cleanupDetachedData() + { + for (typename std::vector::iterator i = detachedData.begin(); i != detachedData.end(); ++i) + { + deleteDataInstance((T*)*i); + } + detachedData.clear(); + } + + // synchronized + void _cleanupTerminatedData() + { + for (typename std::vector::iterator i = dataFromTerminatedThreads.begin(); i != dataFromTerminatedThreads.end(); ++i) + { + deleteDataInstance((T*)*i); + } + dataFromTerminatedThreads.clear(); + } + +protected: + virtual void* createDataInstance() const CV_OVERRIDE + { + // Note: we can collect all allocated data here, but this would require raced mutex locks + return new T; + } + virtual void deleteDataInstance(void* pData) const CV_OVERRIDE + { + if (cleanupMode) + { + delete (T*)pData; + } + else + { + AutoLock lock(mutex); + dataFromTerminatedThreads.push_back((T*)pData); + } + } +}; + + +//! @} + +} // namespace + +#endif // OPENCV_UTILS_TLS_HPP diff --git a/IPL/include/opencv/opencv2/core/utils/trace.hpp b/IPL/include/opencv/opencv2/core/utils/trace.hpp new file mode 100644 index 0000000..ef5d35b --- /dev/null +++ b/IPL/include/opencv/opencv2/core/utils/trace.hpp @@ -0,0 +1,252 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#ifndef OPENCV_TRACE_HPP +#define OPENCV_TRACE_HPP + +#include + +namespace cv { +namespace utils { +namespace trace { + +//! @addtogroup core_logging +//! @{ + +//! Macro to trace function +#define CV_TRACE_FUNCTION() + +#define CV_TRACE_FUNCTION_SKIP_NESTED() + +//! Trace code scope. +//! @note Dynamic names are not supported in this macro (on stack or heap). Use string literals here only, like "initialize". +#define CV_TRACE_REGION(name_as_static_string_literal) +//! mark completed of the current opened region and create new one +//! @note Dynamic names are not supported in this macro (on stack or heap). Use string literals here only, like "step1". +#define CV_TRACE_REGION_NEXT(name_as_static_string_literal) + +//! Macro to trace argument value +#define CV_TRACE_ARG(arg_id) + +//! Macro to trace argument value (expanded version) +#define CV_TRACE_ARG_VALUE(arg_id, arg_name, value) + +//! @cond IGNORED +#define CV_TRACE_NS cv::utils::trace + +#if !defined(OPENCV_DISABLE_TRACE) && defined(__EMSCRIPTEN__) +#define OPENCV_DISABLE_TRACE 1 +#endif + +namespace details { + +#ifndef __OPENCV_TRACE +# if defined __OPENCV_BUILD && !defined __OPENCV_TESTS && !defined __OPENCV_APPS +# define __OPENCV_TRACE 1 +# else +# define __OPENCV_TRACE 0 +# endif +#endif + +#ifndef CV_TRACE_FILENAME +# define CV_TRACE_FILENAME __FILE__ +#endif + +#ifndef CV__TRACE_FUNCTION +# if defined _MSC_VER +# define CV__TRACE_FUNCTION __FUNCSIG__ +# elif defined __GNUC__ +# define CV__TRACE_FUNCTION __PRETTY_FUNCTION__ +# else +# define CV__TRACE_FUNCTION "" +# endif +#endif + +//! Thread-local instance (usually allocated on stack) +class CV_EXPORTS Region +{ +public: + struct LocationExtraData; + struct LocationStaticStorage + { + LocationExtraData** ppExtra; //< implementation specific data + const char* name; //< region name (function name or other custom name) + const char* filename; //< source code filename + int line; //< source code line + int flags; //< flags (implementation code path: Plain, IPP, OpenCL) + }; + + Region(const LocationStaticStorage& location); + inline ~Region() + { + if (implFlags != 0) + destroy(); + CV_DbgAssert(implFlags == 0); + CV_DbgAssert(pImpl == NULL); + } + + class Impl; + Impl* pImpl; // NULL if current region is not active + int implFlags; // see RegionFlag, 0 if region is ignored + + bool isActive() const { return pImpl != NULL; } + + void destroy(); +private: + Region(const Region&); // disabled + Region& operator= (const Region&); // disabled +}; + +//! Specify region flags +enum RegionLocationFlag { + REGION_FLAG_FUNCTION = (1 << 0), //< region is function (=1) / nested named region (=0) + REGION_FLAG_APP_CODE = (1 << 1), //< region is Application code (=1) / OpenCV library code (=0) + REGION_FLAG_SKIP_NESTED = (1 << 2), //< avoid processing of nested regions + + REGION_FLAG_IMPL_IPP = (1 << 16), //< region is part of IPP code path + REGION_FLAG_IMPL_OPENCL = (2 << 16), //< region is part of OpenCL code path + REGION_FLAG_IMPL_OPENVX = (3 << 16), //< region is part of OpenVX code path + + REGION_FLAG_IMPL_MASK = (15 << 16), + + REGION_FLAG_REGION_FORCE = (1 << 30), + REGION_FLAG_REGION_NEXT = (1 << 31), //< close previous region (see #CV_TRACE_REGION_NEXT macro) + + ENUM_REGION_FLAG_FORCE_INT = INT_MAX +}; + +struct CV_EXPORTS TraceArg { +public: + struct ExtraData; + ExtraData** ppExtra; + const char* name; + int flags; +}; +/** @brief Add meta information to current region (function) + * See CV_TRACE_ARG macro + * @param arg argument information structure (global static cache) + * @param value argument value (can by dynamic string literal in case of string, static allocation is not required) + */ +CV_EXPORTS void traceArg(const TraceArg& arg, const char* value); +//! @overload +CV_EXPORTS void traceArg(const TraceArg& arg, int value); +//! @overload +CV_EXPORTS void traceArg(const TraceArg& arg, int64 value); +//! @overload +CV_EXPORTS void traceArg(const TraceArg& arg, double value); + +#define CV__TRACE_LOCATION_VARNAME(loc_id) CVAUX_CONCAT(CVAUX_CONCAT(__cv_trace_location_, loc_id), __LINE__) +#define CV__TRACE_LOCATION_EXTRA_VARNAME(loc_id) CVAUX_CONCAT(CVAUX_CONCAT(__cv_trace_location_extra_, loc_id) , __LINE__) + +#define CV__TRACE_DEFINE_LOCATION_(loc_id, name, flags) \ + static CV_TRACE_NS::details::Region::LocationExtraData* CV__TRACE_LOCATION_EXTRA_VARNAME(loc_id) = 0; \ + static const CV_TRACE_NS::details::Region::LocationStaticStorage \ + CV__TRACE_LOCATION_VARNAME(loc_id) = { &(CV__TRACE_LOCATION_EXTRA_VARNAME(loc_id)), name, CV_TRACE_FILENAME, __LINE__, flags}; + +#define CV__TRACE_DEFINE_LOCATION_FN(name, flags) CV__TRACE_DEFINE_LOCATION_(fn, name, ((flags) | CV_TRACE_NS::details::REGION_FLAG_FUNCTION)) + + +#define CV__TRACE_OPENCV_FUNCTION() \ + CV__TRACE_DEFINE_LOCATION_FN(CV__TRACE_FUNCTION, 0); \ + const CV_TRACE_NS::details::Region __region_fn(CV__TRACE_LOCATION_VARNAME(fn)); + +#define CV__TRACE_OPENCV_FUNCTION_NAME(name) \ + CV__TRACE_DEFINE_LOCATION_FN(name, 0); \ + const CV_TRACE_NS::details::Region __region_fn(CV__TRACE_LOCATION_VARNAME(fn)); + +#define CV__TRACE_APP_FUNCTION() \ + CV__TRACE_DEFINE_LOCATION_FN(CV__TRACE_FUNCTION, CV_TRACE_NS::details::REGION_FLAG_APP_CODE); \ + const CV_TRACE_NS::details::Region __region_fn(CV__TRACE_LOCATION_VARNAME(fn)); + +#define CV__TRACE_APP_FUNCTION_NAME(name) \ + CV__TRACE_DEFINE_LOCATION_FN(name, CV_TRACE_NS::details::REGION_FLAG_APP_CODE); \ + const CV_TRACE_NS::details::Region __region_fn(CV__TRACE_LOCATION_VARNAME(fn)); + + +#define CV__TRACE_OPENCV_FUNCTION_SKIP_NESTED() \ + CV__TRACE_DEFINE_LOCATION_FN(CV__TRACE_FUNCTION, CV_TRACE_NS::details::REGION_FLAG_SKIP_NESTED); \ + const CV_TRACE_NS::details::Region __region_fn(CV__TRACE_LOCATION_VARNAME(fn)); + +#define CV__TRACE_OPENCV_FUNCTION_NAME_SKIP_NESTED(name) \ + CV__TRACE_DEFINE_LOCATION_FN(name, CV_TRACE_NS::details::REGION_FLAG_SKIP_NESTED); \ + const CV_TRACE_NS::details::Region __region_fn(CV__TRACE_LOCATION_VARNAME(fn)); + +#define CV__TRACE_APP_FUNCTION_SKIP_NESTED() \ + CV__TRACE_DEFINE_LOCATION_FN(CV__TRACE_FUNCTION, CV_TRACE_NS::details::REGION_FLAG_SKIP_NESTED | CV_TRACE_NS::details::REGION_FLAG_APP_CODE); \ + const CV_TRACE_NS::details::Region __region_fn(CV__TRACE_LOCATION_VARNAME(fn)); + + +#define CV__TRACE_REGION_(name_as_static_string_literal, flags) \ + CV__TRACE_DEFINE_LOCATION_(region, name_as_static_string_literal, flags); \ + CV_TRACE_NS::details::Region CVAUX_CONCAT(__region_, __LINE__)(CV__TRACE_LOCATION_VARNAME(region)); + +#define CV__TRACE_REGION(name_as_static_string_literal) CV__TRACE_REGION_(name_as_static_string_literal, 0) +#define CV__TRACE_REGION_NEXT(name_as_static_string_literal) CV__TRACE_REGION_(name_as_static_string_literal, CV_TRACE_NS::details::REGION_FLAG_REGION_NEXT) + +#define CV__TRACE_ARG_VARNAME(arg_id) CVAUX_CONCAT(__cv_trace_arg_ ## arg_id, __LINE__) +#define CV__TRACE_ARG_EXTRA_VARNAME(arg_id) CVAUX_CONCAT(__cv_trace_arg_extra_ ## arg_id, __LINE__) + +#define CV__TRACE_DEFINE_ARG_(arg_id, name, flags) \ + static CV_TRACE_NS::details::TraceArg::ExtraData* CV__TRACE_ARG_EXTRA_VARNAME(arg_id) = 0; \ + static const CV_TRACE_NS::details::TraceArg \ + CV__TRACE_ARG_VARNAME(arg_id) = { &(CV__TRACE_ARG_EXTRA_VARNAME(arg_id)), name, flags }; + +#define CV__TRACE_ARG_VALUE(arg_id, arg_name, value) \ + CV__TRACE_DEFINE_ARG_(arg_id, arg_name, 0); \ + CV_TRACE_NS::details::traceArg((CV__TRACE_ARG_VARNAME(arg_id)), value); + +#define CV__TRACE_ARG(arg_id) CV_TRACE_ARG_VALUE(arg_id, #arg_id, (arg_id)) + +} // namespace + +#ifndef OPENCV_DISABLE_TRACE +#undef CV_TRACE_FUNCTION +#undef CV_TRACE_FUNCTION_SKIP_NESTED +#if __OPENCV_TRACE +#define CV_TRACE_FUNCTION CV__TRACE_OPENCV_FUNCTION +#define CV_TRACE_FUNCTION_SKIP_NESTED CV__TRACE_OPENCV_FUNCTION_SKIP_NESTED +#else +#define CV_TRACE_FUNCTION CV__TRACE_APP_FUNCTION +#define CV_TRACE_FUNCTION_SKIP_NESTED CV__TRACE_APP_FUNCTION_SKIP_NESTED +#endif + +#undef CV_TRACE_REGION +#define CV_TRACE_REGION CV__TRACE_REGION + +#undef CV_TRACE_REGION_NEXT +#define CV_TRACE_REGION_NEXT CV__TRACE_REGION_NEXT + +#undef CV_TRACE_ARG_VALUE +#define CV_TRACE_ARG_VALUE(arg_id, arg_name, value) \ + if (__region_fn.isActive()) \ + { \ + CV__TRACE_ARG_VALUE(arg_id, arg_name, value); \ + } + +#undef CV_TRACE_ARG +#define CV_TRACE_ARG CV__TRACE_ARG + +#endif // OPENCV_DISABLE_TRACE + +#ifdef OPENCV_TRACE_VERBOSE +#define CV_TRACE_FUNCTION_VERBOSE CV_TRACE_FUNCTION +#define CV_TRACE_REGION_VERBOSE CV_TRACE_REGION +#define CV_TRACE_REGION_NEXT_VERBOSE CV_TRACE_REGION_NEXT +#define CV_TRACE_ARG_VALUE_VERBOSE CV_TRACE_ARG_VALUE +#define CV_TRACE_ARG_VERBOSE CV_TRACE_ARG +#else +#define CV_TRACE_FUNCTION_VERBOSE(...) +#define CV_TRACE_REGION_VERBOSE(...) +#define CV_TRACE_REGION_NEXT_VERBOSE(...) +#define CV_TRACE_ARG_VALUE_VERBOSE(...) +#define CV_TRACE_ARG_VERBOSE(...) +#endif + +//! @endcond + +//! @} + +}}} // namespace + +#endif // OPENCV_TRACE_HPP diff --git a/IPL/include/opencv/opencv2/core/va_intel.hpp b/IPL/include/opencv/opencv2/core/va_intel.hpp index f4bb8a6..f665470 100644 --- a/IPL/include/opencv/opencv2/core/va_intel.hpp +++ b/IPL/include/opencv/opencv2/core/va_intel.hpp @@ -5,8 +5,8 @@ // Copyright (C) 2015, Itseez, Inc., all rights reserved. // Third party copyrights are property of their respective owners. -#ifndef __OPENCV_CORE_VA_INTEL_HPP__ -#define __OPENCV_CORE_VA_INTEL_HPP__ +#ifndef OPENCV_CORE_VA_INTEL_HPP +#define OPENCV_CORE_VA_INTEL_HPP #ifndef __cplusplus # error va_intel.hpp header must be compiled as C++ @@ -31,8 +31,9 @@ This section describes Intel VA-API/OpenCL (CL-VA) interoperability. To enable CL-VA interoperability support, configure OpenCV using CMake with WITH_VA_INTEL=ON . Currently VA-API is supported on Linux only. You should also install Intel Media Server Studio (MSS) to use this feature. You may -have to specify the path(s) to MSS components for cmake in environment variables: VA_INTEL_MSDK_ROOT for Media SDK -(default is "/opt/intel/mediasdk"), and VA_INTEL_IOCL_ROOT for Intel OpenCL (default is "/opt/intel/opencl"). +have to specify the path(s) to MSS components for cmake in environment variables: + +- VA_INTEL_IOCL_ROOT for Intel OpenCL (default is "/opt/intel/opencl"). To use CL-VA interoperability you should first create VADisplay (libva), and then call initializeContextFromVA() function to create OpenCL context and set up interoperability. @@ -74,4 +75,4 @@ CV_EXPORTS void convertFromVASurface(VADisplay display, VASurfaceID surface, Siz }} // namespace cv::va_intel -#endif /* __OPENCV_CORE_VA_INTEL_HPP__ */ +#endif /* OPENCV_CORE_VA_INTEL_HPP */ diff --git a/IPL/include/opencv/opencv2/core/version.hpp b/IPL/include/opencv/opencv2/core/version.hpp index a69d42f..0ef883e 100644 --- a/IPL/include/opencv/opencv2/core/version.hpp +++ b/IPL/include/opencv/opencv2/core/version.hpp @@ -1,64 +1,19 @@ -/*M/////////////////////////////////////////////////////////////////////////////////////// -// -// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. -// -// By downloading, copying, installing or using the software you agree to this license. -// If you do not agree to this license, do not download, install, -// copy or use the software. -// -// -// Intel License Agreement -// For Open Source Computer Vision Library -// -// Copyright( C) 2000-2015, Intel Corporation, all rights reserved. -// Copyright (C) 2011-2013, NVIDIA Corporation, all rights reserved. -// Copyright (C) 2013, OpenCV Foundation, all rights reserved. -// Copyright (C) 2015, Itseez Inc., all rights reserved. -// Third party copyrights are property of their respective owners. -// -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: -// -// * Redistribution's of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// * Redistribution's in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// * The name of Intel Corporation may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// This software is provided by the copyright holders and contributors "as is" and -// any express or implied warranties, including, but not limited to, the implied -// warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, -// indirect, incidental, special, exemplary, or consequential damages -//(including, but not limited to, procurement of substitute goods or services; -// loss of use, data, or profits; or business interruption) however caused -// and on any theory of liability, whether in contract, strict liability, -// or tort(including negligence or otherwise) arising in any way out of -// the use of this software, even if advised of the possibility of such damage. -// -//M*/ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. -/* - definition of the current version of OpenCV - Usefull to test in user programs -*/ +#ifndef OPENCV_VERSION_HPP +#define OPENCV_VERSION_HPP -#ifndef __OPENCV_VERSION_HPP__ -#define __OPENCV_VERSION_HPP__ - -#define CV_VERSION_MAJOR 3 -#define CV_VERSION_MINOR 1 +#define CV_VERSION_MAJOR 4 +#define CV_VERSION_MINOR 3 #define CV_VERSION_REVISION 0 #define CV_VERSION_STATUS "-dev" #define CVAUX_STR_EXP(__A) #__A #define CVAUX_STR(__A) CVAUX_STR_EXP(__A) -#define CVAUX_STRW_EXP(__A) L#__A +#define CVAUX_STRW_EXP(__A) L ## #__A #define CVAUX_STRW(__A) CVAUX_STRW_EXP(__A) #define CV_VERSION CVAUX_STR(CV_VERSION_MAJOR) "." CVAUX_STR(CV_VERSION_MINOR) "." CVAUX_STR(CV_VERSION_REVISION) CV_VERSION_STATUS @@ -68,4 +23,4 @@ #define CV_MINOR_VERSION CV_VERSION_MINOR #define CV_SUBMINOR_VERSION CV_VERSION_REVISION -#endif +#endif // OPENCV_VERSION_HPP diff --git a/IPL/include/opencv/opencv2/core/vsx_utils.hpp b/IPL/include/opencv/opencv2/core/vsx_utils.hpp new file mode 100644 index 0000000..08ae890 --- /dev/null +++ b/IPL/include/opencv/opencv2/core/vsx_utils.hpp @@ -0,0 +1,1040 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html + +#ifndef OPENCV_HAL_VSX_UTILS_HPP +#define OPENCV_HAL_VSX_UTILS_HPP + +#include "opencv2/core/cvdef.h" + +#ifndef SKIP_INCLUDES +# include +#endif + +//! @addtogroup core_utils_vsx +//! @{ +#if CV_VSX + +#define __VSX_S16__(c, v) (c){v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v} +#define __VSX_S8__(c, v) (c){v, v, v, v, v, v, v, v} +#define __VSX_S4__(c, v) (c){v, v, v, v} +#define __VSX_S2__(c, v) (c){v, v} + +typedef __vector unsigned char vec_uchar16; +#define vec_uchar16_set(...) (vec_uchar16){__VA_ARGS__} +#define vec_uchar16_sp(c) (__VSX_S16__(vec_uchar16, (unsigned char)c)) +#define vec_uchar16_c(v) ((vec_uchar16)(v)) +#define vec_uchar16_z vec_uchar16_sp(0) + +typedef __vector signed char vec_char16; +#define vec_char16_set(...) (vec_char16){__VA_ARGS__} +#define vec_char16_sp(c) (__VSX_S16__(vec_char16, (signed char)c)) +#define vec_char16_c(v) ((vec_char16)(v)) +#define vec_char16_z vec_char16_sp(0) + +typedef __vector unsigned short vec_ushort8; +#define vec_ushort8_set(...) (vec_ushort8){__VA_ARGS__} +#define vec_ushort8_sp(c) (__VSX_S8__(vec_ushort8, (unsigned short)c)) +#define vec_ushort8_c(v) ((vec_ushort8)(v)) +#define vec_ushort8_z vec_ushort8_sp(0) + +typedef __vector signed short vec_short8; +#define vec_short8_set(...) (vec_short8){__VA_ARGS__} +#define vec_short8_sp(c) (__VSX_S8__(vec_short8, (signed short)c)) +#define vec_short8_c(v) ((vec_short8)(v)) +#define vec_short8_z vec_short8_sp(0) + +typedef __vector unsigned int vec_uint4; +#define vec_uint4_set(...) (vec_uint4){__VA_ARGS__} +#define vec_uint4_sp(c) (__VSX_S4__(vec_uint4, (unsigned int)c)) +#define vec_uint4_c(v) ((vec_uint4)(v)) +#define vec_uint4_z vec_uint4_sp(0) + +typedef __vector signed int vec_int4; +#define vec_int4_set(...) (vec_int4){__VA_ARGS__} +#define vec_int4_sp(c) (__VSX_S4__(vec_int4, (signed int)c)) +#define vec_int4_c(v) ((vec_int4)(v)) +#define vec_int4_z vec_int4_sp(0) + +typedef __vector float vec_float4; +#define vec_float4_set(...) (vec_float4){__VA_ARGS__} +#define vec_float4_sp(c) (__VSX_S4__(vec_float4, c)) +#define vec_float4_c(v) ((vec_float4)(v)) +#define vec_float4_z vec_float4_sp(0) + +typedef __vector unsigned long long vec_udword2; +#define vec_udword2_set(...) (vec_udword2){__VA_ARGS__} +#define vec_udword2_sp(c) (__VSX_S2__(vec_udword2, (unsigned long long)c)) +#define vec_udword2_c(v) ((vec_udword2)(v)) +#define vec_udword2_z vec_udword2_sp(0) + +typedef __vector signed long long vec_dword2; +#define vec_dword2_set(...) (vec_dword2){__VA_ARGS__} +#define vec_dword2_sp(c) (__VSX_S2__(vec_dword2, (signed long long)c)) +#define vec_dword2_c(v) ((vec_dword2)(v)) +#define vec_dword2_z vec_dword2_sp(0) + +typedef __vector double vec_double2; +#define vec_double2_set(...) (vec_double2){__VA_ARGS__} +#define vec_double2_c(v) ((vec_double2)(v)) +#define vec_double2_sp(c) (__VSX_S2__(vec_double2, c)) +#define vec_double2_z vec_double2_sp(0) + +#define vec_bchar16 __vector __bool char +#define vec_bchar16_set(...) (vec_bchar16){__VA_ARGS__} +#define vec_bchar16_c(v) ((vec_bchar16)(v)) + +#define vec_bshort8 __vector __bool short +#define vec_bshort8_set(...) (vec_bshort8){__VA_ARGS__} +#define vec_bshort8_c(v) ((vec_bshort8)(v)) + +#define vec_bint4 __vector __bool int +#define vec_bint4_set(...) (vec_bint4){__VA_ARGS__} +#define vec_bint4_c(v) ((vec_bint4)(v)) + +#define vec_bdword2 __vector __bool long long +#define vec_bdword2_set(...) (vec_bdword2){__VA_ARGS__} +#define vec_bdword2_c(v) ((vec_bdword2)(v)) + +#define VSX_FINLINE(tp) extern inline tp __attribute__((always_inline)) + +#define VSX_REDIRECT_1RG(rt, rg, fnm, fn2) \ +VSX_FINLINE(rt) fnm(const rg& a) { return fn2(a); } + +#define VSX_REDIRECT_2RG(rt, rg, fnm, fn2) \ +VSX_FINLINE(rt) fnm(const rg& a, const rg& b) { return fn2(a, b); } + +/* + * GCC VSX compatibility +**/ +#if defined(__GNUG__) && !defined(__clang__) + +// inline asm helper +#define VSX_IMPL_1RG(rt, rg, opc, fnm) \ +VSX_FINLINE(rt) fnm(const rg& a) \ +{ rt rs; __asm__ __volatile__(#opc" %x0,%x1" : "=wa" (rs) : "wa" (a)); return rs; } + +#define VSX_IMPL_1VRG(rt, rg, opc, fnm) \ +VSX_FINLINE(rt) fnm(const rg& a) \ +{ rt rs; __asm__ __volatile__(#opc" %0,%1" : "=v" (rs) : "v" (a)); return rs; } + +#define VSX_IMPL_2VRG_F(rt, rg, fopc, fnm) \ +VSX_FINLINE(rt) fnm(const rg& a, const rg& b) \ +{ rt rs; __asm__ __volatile__(fopc : "=v" (rs) : "v" (a), "v" (b)); return rs; } + +#define VSX_IMPL_2VRG(rt, rg, opc, fnm) VSX_IMPL_2VRG_F(rt, rg, #opc" %0,%1,%2", fnm) + +#if __GNUG__ < 8 + + // Support for int4 -> dword2 expanding multiply was added in GCC 8. + #ifdef vec_mule + #undef vec_mule + #endif + #ifdef vec_mulo + #undef vec_mulo + #endif + + VSX_REDIRECT_2RG(vec_ushort8, vec_uchar16, vec_mule, __builtin_vec_mule) + VSX_REDIRECT_2RG(vec_short8, vec_char16, vec_mule, __builtin_vec_mule) + VSX_REDIRECT_2RG(vec_int4, vec_short8, vec_mule, __builtin_vec_mule) + VSX_REDIRECT_2RG(vec_uint4, vec_ushort8, vec_mule, __builtin_vec_mule) + VSX_REDIRECT_2RG(vec_ushort8, vec_uchar16, vec_mulo, __builtin_vec_mulo) + VSX_REDIRECT_2RG(vec_short8, vec_char16, vec_mulo, __builtin_vec_mulo) + VSX_REDIRECT_2RG(vec_int4, vec_short8, vec_mulo, __builtin_vec_mulo) + VSX_REDIRECT_2RG(vec_uint4, vec_ushort8, vec_mulo, __builtin_vec_mulo) + + // dword2 support arrived in ISA 2.07 and GCC 8+ + VSX_IMPL_2VRG(vec_dword2, vec_int4, vmulosw, vec_mule) + VSX_IMPL_2VRG(vec_udword2, vec_uint4, vmulouw, vec_mule) + VSX_IMPL_2VRG(vec_dword2, vec_int4, vmulesw, vec_mulo) + VSX_IMPL_2VRG(vec_udword2, vec_uint4, vmuleuw, vec_mulo) + +#endif + +#if __GNUG__ < 7 +// up to GCC 6 vec_mul only supports precisions and llong +# ifdef vec_mul +# undef vec_mul +# endif +/* + * there's no a direct instruction for supporting 8-bit, 16-bit multiplication in ISA 2.07, + * XLC Implement it by using instruction "multiply even", "multiply odd" and "permute" +**/ +# define VSX_IMPL_MULH(Tvec, cperm) \ + VSX_FINLINE(Tvec) vec_mul(const Tvec& a, const Tvec& b) \ + { \ + static const vec_uchar16 ev_od = {cperm}; \ + return vec_perm((Tvec)vec_mule(a, b), (Tvec)vec_mulo(a, b), ev_od); \ + } + #define VSX_IMPL_MULH_P16 0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30 + VSX_IMPL_MULH(vec_char16, VSX_IMPL_MULH_P16) + VSX_IMPL_MULH(vec_uchar16, VSX_IMPL_MULH_P16) + #define VSX_IMPL_MULH_P8 0, 1, 16, 17, 4, 5, 20, 21, 8, 9, 24, 25, 12, 13, 28, 29 + VSX_IMPL_MULH(vec_short8, VSX_IMPL_MULH_P8) + VSX_IMPL_MULH(vec_ushort8, VSX_IMPL_MULH_P8) + // vmuluwm can be used for unsigned or signed integers, that's what they said + VSX_IMPL_2VRG(vec_int4, vec_int4, vmuluwm, vec_mul) + VSX_IMPL_2VRG(vec_uint4, vec_uint4, vmuluwm, vec_mul) + // redirect to GCC builtin vec_mul, since it already supports precisions and llong + VSX_REDIRECT_2RG(vec_float4, vec_float4, vec_mul, __builtin_vec_mul) + VSX_REDIRECT_2RG(vec_double2, vec_double2, vec_mul, __builtin_vec_mul) + VSX_REDIRECT_2RG(vec_dword2, vec_dword2, vec_mul, __builtin_vec_mul) + VSX_REDIRECT_2RG(vec_udword2, vec_udword2, vec_mul, __builtin_vec_mul) +#endif // __GNUG__ < 7 + +#if __GNUG__ < 6 +/* + * Instruction "compare greater than or equal" in ISA 2.07 only supports single + * and double precision. + * In XLC and new versions of GCC implement integers by using instruction "greater than" and NOR. +**/ +# ifdef vec_cmpge +# undef vec_cmpge +# endif +# ifdef vec_cmple +# undef vec_cmple +# endif +# define vec_cmple(a, b) vec_cmpge(b, a) +# define VSX_IMPL_CMPGE(rt, rg, opc, fnm) \ + VSX_IMPL_2VRG_F(rt, rg, #opc" %0,%2,%1\n\t xxlnor %x0,%x0,%x0", fnm) + + VSX_IMPL_CMPGE(vec_bchar16, vec_char16, vcmpgtsb, vec_cmpge) + VSX_IMPL_CMPGE(vec_bchar16, vec_uchar16, vcmpgtub, vec_cmpge) + VSX_IMPL_CMPGE(vec_bshort8, vec_short8, vcmpgtsh, vec_cmpge) + VSX_IMPL_CMPGE(vec_bshort8, vec_ushort8, vcmpgtuh, vec_cmpge) + VSX_IMPL_CMPGE(vec_bint4, vec_int4, vcmpgtsw, vec_cmpge) + VSX_IMPL_CMPGE(vec_bint4, vec_uint4, vcmpgtuw, vec_cmpge) + VSX_IMPL_CMPGE(vec_bdword2, vec_dword2, vcmpgtsd, vec_cmpge) + VSX_IMPL_CMPGE(vec_bdword2, vec_udword2, vcmpgtud, vec_cmpge) + +// redirect to GCC builtin cmpge, since it already supports precisions + VSX_REDIRECT_2RG(vec_bint4, vec_float4, vec_cmpge, __builtin_vec_cmpge) + VSX_REDIRECT_2RG(vec_bdword2, vec_double2, vec_cmpge, __builtin_vec_cmpge) + +// up to gcc5 vec_nor doesn't support bool long long +# undef vec_nor + template + VSX_REDIRECT_2RG(T, T, vec_nor, __builtin_vec_nor) + + VSX_FINLINE(vec_bdword2) vec_nor(const vec_bdword2& a, const vec_bdword2& b) + { return vec_bdword2_c(__builtin_vec_nor(vec_dword2_c(a), vec_dword2_c(b))); } + +// vec_packs doesn't support double words in gcc4 and old versions of gcc5 +# undef vec_packs + VSX_REDIRECT_2RG(vec_char16, vec_short8, vec_packs, __builtin_vec_packs) + VSX_REDIRECT_2RG(vec_uchar16, vec_ushort8, vec_packs, __builtin_vec_packs) + VSX_REDIRECT_2RG(vec_short8, vec_int4, vec_packs, __builtin_vec_packs) + VSX_REDIRECT_2RG(vec_ushort8, vec_uint4, vec_packs, __builtin_vec_packs) + + VSX_IMPL_2VRG_F(vec_int4, vec_dword2, "vpksdss %0,%2,%1", vec_packs) + VSX_IMPL_2VRG_F(vec_uint4, vec_udword2, "vpkudus %0,%2,%1", vec_packs) +#endif // __GNUG__ < 6 + +#if __GNUG__ < 5 +// vec_xxpermdi in gcc4 missing little-endian supports just like clang +# define vec_permi(a, b, c) vec_xxpermdi(b, a, (3 ^ (((c) & 1) << 1 | (c) >> 1))) +// same as vec_xxpermdi +# undef vec_vbpermq + VSX_IMPL_2VRG(vec_udword2, vec_uchar16, vbpermq, vec_vbpermq) + VSX_IMPL_2VRG(vec_dword2, vec_char16, vbpermq, vec_vbpermq) +#else +# define vec_permi vec_xxpermdi +#endif // __GNUG__ < 5 + +// shift left double by word immediate +#ifndef vec_sldw +# define vec_sldw __builtin_vsx_xxsldwi +#endif + +// vector population count +VSX_IMPL_1VRG(vec_uchar16, vec_uchar16, vpopcntb, vec_popcntu) +VSX_IMPL_1VRG(vec_uchar16, vec_char16, vpopcntb, vec_popcntu) +VSX_IMPL_1VRG(vec_ushort8, vec_ushort8, vpopcnth, vec_popcntu) +VSX_IMPL_1VRG(vec_ushort8, vec_short8, vpopcnth, vec_popcntu) +VSX_IMPL_1VRG(vec_uint4, vec_uint4, vpopcntw, vec_popcntu) +VSX_IMPL_1VRG(vec_uint4, vec_int4, vpopcntw, vec_popcntu) +VSX_IMPL_1VRG(vec_udword2, vec_udword2, vpopcntd, vec_popcntu) +VSX_IMPL_1VRG(vec_udword2, vec_dword2, vpopcntd, vec_popcntu) + +// converts between single and double-precision +VSX_REDIRECT_1RG(vec_float4, vec_double2, vec_cvfo, __builtin_vsx_xvcvdpsp) +VSX_REDIRECT_1RG(vec_double2, vec_float4, vec_cvfo, __builtin_vsx_xvcvspdp) + +// converts word and doubleword to double-precision +#undef vec_ctd +VSX_IMPL_1RG(vec_double2, vec_int4, xvcvsxwdp, vec_ctdo) +VSX_IMPL_1RG(vec_double2, vec_uint4, xvcvuxwdp, vec_ctdo) +VSX_IMPL_1RG(vec_double2, vec_dword2, xvcvsxddp, vec_ctd) +VSX_IMPL_1RG(vec_double2, vec_udword2, xvcvuxddp, vec_ctd) + +// converts word and doubleword to single-precision +#undef vec_ctf +VSX_IMPL_1RG(vec_float4, vec_int4, xvcvsxwsp, vec_ctf) +VSX_IMPL_1RG(vec_float4, vec_uint4, xvcvuxwsp, vec_ctf) +VSX_IMPL_1RG(vec_float4, vec_dword2, xvcvsxdsp, vec_ctfo) +VSX_IMPL_1RG(vec_float4, vec_udword2, xvcvuxdsp, vec_ctfo) + +// converts single and double precision to signed word +#undef vec_cts +VSX_IMPL_1RG(vec_int4, vec_double2, xvcvdpsxws, vec_ctso) +VSX_IMPL_1RG(vec_int4, vec_float4, xvcvspsxws, vec_cts) + +// converts single and double precision to unsigned word +#undef vec_ctu +VSX_IMPL_1RG(vec_uint4, vec_double2, xvcvdpuxws, vec_ctuo) +VSX_IMPL_1RG(vec_uint4, vec_float4, xvcvspuxws, vec_ctu) + +// converts single and double precision to signed doubleword +#undef vec_ctsl +VSX_IMPL_1RG(vec_dword2, vec_double2, xvcvdpsxds, vec_ctsl) +VSX_IMPL_1RG(vec_dword2, vec_float4, xvcvspsxds, vec_ctslo) + +// converts single and double precision to unsigned doubleword +#undef vec_ctul +VSX_IMPL_1RG(vec_udword2, vec_double2, xvcvdpuxds, vec_ctul) +VSX_IMPL_1RG(vec_udword2, vec_float4, xvcvspuxds, vec_ctulo) + +// just in case if GCC doesn't define it +#ifndef vec_xl +# define vec_xl vec_vsx_ld +# define vec_xst vec_vsx_st +#endif + +#endif // GCC VSX compatibility + +/* + * CLANG VSX compatibility +**/ +#if defined(__clang__) && !defined(__IBMCPP__) + +/* + * CLANG doesn't support %x in the inline asm template which fixes register number + * when using any of the register constraints wa, wd, wf + * + * For more explanation checkout PowerPC and IBM RS6000 in https://gcc.gnu.org/onlinedocs/gcc/Machine-Constraints.html + * Also there's already an open bug https://bugs.llvm.org/show_bug.cgi?id=31837 + * + * So we're not able to use inline asm and only use built-in functions that CLANG supports + * and use __builtin_convertvector if clang missing any of vector conversions built-in functions + * + * todo: clang asm template bug is fixed, need to reconsider the current workarounds. +*/ + +// convert vector helper +#define VSX_IMPL_CONVERT(rt, rg, fnm) \ +VSX_FINLINE(rt) fnm(const rg& a) { return __builtin_convertvector(a, rt); } + +#if __clang_major__ < 5 +// implement vec_permi in a dirty way +# define VSX_IMPL_CLANG_4_PERMI(Tvec) \ + VSX_FINLINE(Tvec) vec_permi(const Tvec& a, const Tvec& b, unsigned const char c) \ + { \ + switch (c) \ + { \ + case 0: \ + return vec_mergeh(a, b); \ + case 1: \ + return vec_mergel(vec_mergeh(a, a), b); \ + case 2: \ + return vec_mergeh(vec_mergel(a, a), b); \ + default: \ + return vec_mergel(a, b); \ + } \ + } + VSX_IMPL_CLANG_4_PERMI(vec_udword2) + VSX_IMPL_CLANG_4_PERMI(vec_dword2) + VSX_IMPL_CLANG_4_PERMI(vec_double2) + +// vec_xxsldwi is missing in clang 4 +# define vec_xxsldwi(a, b, c) vec_sld(a, b, (c) * 4) +#else +// vec_xxpermdi is missing little-endian supports in clang 4 just like gcc4 +# define vec_permi(a, b, c) vec_xxpermdi(b, a, (3 ^ (((c) & 1) << 1 | (c) >> 1))) +#endif // __clang_major__ < 5 + +// shift left double by word immediate +#ifndef vec_sldw +# define vec_sldw vec_xxsldwi +#endif + +// Implement vec_rsqrt since clang only supports vec_rsqrte +#ifndef vec_rsqrt + VSX_FINLINE(vec_float4) vec_rsqrt(const vec_float4& a) + { return vec_div(vec_float4_sp(1), vec_sqrt(a)); } + + VSX_FINLINE(vec_double2) vec_rsqrt(const vec_double2& a) + { return vec_div(vec_double2_sp(1), vec_sqrt(a)); } +#endif + +// vec_promote missing support for doubleword +VSX_FINLINE(vec_dword2) vec_promote(long long a, int b) +{ + vec_dword2 ret = vec_dword2_z; + ret[b & 1] = a; + return ret; +} + +VSX_FINLINE(vec_udword2) vec_promote(unsigned long long a, int b) +{ + vec_udword2 ret = vec_udword2_z; + ret[b & 1] = a; + return ret; +} + +// vec_popcnt should return unsigned but clang has different thought just like gcc in vec_vpopcnt +#define VSX_IMPL_POPCNTU(Tvec, Tvec2, ucast) \ +VSX_FINLINE(Tvec) vec_popcntu(const Tvec2& a) \ +{ return ucast(vec_popcnt(a)); } +VSX_IMPL_POPCNTU(vec_uchar16, vec_char16, vec_uchar16_c); +VSX_IMPL_POPCNTU(vec_ushort8, vec_short8, vec_ushort8_c); +VSX_IMPL_POPCNTU(vec_uint4, vec_int4, vec_uint4_c); +VSX_IMPL_POPCNTU(vec_udword2, vec_dword2, vec_udword2_c); +// redirect unsigned types +VSX_REDIRECT_1RG(vec_uchar16, vec_uchar16, vec_popcntu, vec_popcnt) +VSX_REDIRECT_1RG(vec_ushort8, vec_ushort8, vec_popcntu, vec_popcnt) +VSX_REDIRECT_1RG(vec_uint4, vec_uint4, vec_popcntu, vec_popcnt) +VSX_REDIRECT_1RG(vec_udword2, vec_udword2, vec_popcntu, vec_popcnt) + +// converts between single and double precision +VSX_REDIRECT_1RG(vec_float4, vec_double2, vec_cvfo, __builtin_vsx_xvcvdpsp) +VSX_REDIRECT_1RG(vec_double2, vec_float4, vec_cvfo, __builtin_vsx_xvcvspdp) + +// converts word and doubleword to double-precision +#ifdef vec_ctd +# undef vec_ctd +#endif +VSX_REDIRECT_1RG(vec_double2, vec_int4, vec_ctdo, __builtin_vsx_xvcvsxwdp) +VSX_REDIRECT_1RG(vec_double2, vec_uint4, vec_ctdo, __builtin_vsx_xvcvuxwdp) + +VSX_IMPL_CONVERT(vec_double2, vec_dword2, vec_ctd) +VSX_IMPL_CONVERT(vec_double2, vec_udword2, vec_ctd) + +// converts word and doubleword to single-precision +#if __clang_major__ > 4 +# undef vec_ctf +#endif +VSX_IMPL_CONVERT(vec_float4, vec_int4, vec_ctf) +VSX_IMPL_CONVERT(vec_float4, vec_uint4, vec_ctf) +VSX_REDIRECT_1RG(vec_float4, vec_dword2, vec_ctfo, __builtin_vsx_xvcvsxdsp) +VSX_REDIRECT_1RG(vec_float4, vec_udword2, vec_ctfo, __builtin_vsx_xvcvuxdsp) + +// converts single and double precision to signed word +#if __clang_major__ > 4 +# undef vec_cts +#endif +VSX_REDIRECT_1RG(vec_int4, vec_double2, vec_ctso, __builtin_vsx_xvcvdpsxws) +VSX_IMPL_CONVERT(vec_int4, vec_float4, vec_cts) + +// converts single and double precision to unsigned word +#if __clang_major__ > 4 +# undef vec_ctu +#endif +VSX_REDIRECT_1RG(vec_uint4, vec_double2, vec_ctuo, __builtin_vsx_xvcvdpuxws) +VSX_IMPL_CONVERT(vec_uint4, vec_float4, vec_ctu) + +// converts single and double precision to signed doubleword +#ifdef vec_ctsl +# undef vec_ctsl +#endif +VSX_IMPL_CONVERT(vec_dword2, vec_double2, vec_ctsl) +// __builtin_convertvector unable to convert, xvcvspsxds is missing on it +VSX_FINLINE(vec_dword2) vec_ctslo(const vec_float4& a) +{ return vec_ctsl(vec_cvfo(a)); } + +// converts single and double precision to unsigned doubleword +#ifdef vec_ctul +# undef vec_ctul +#endif +VSX_IMPL_CONVERT(vec_udword2, vec_double2, vec_ctul) +// __builtin_convertvector unable to convert, xvcvspuxds is missing on it +VSX_FINLINE(vec_udword2) vec_ctulo(const vec_float4& a) +{ return vec_ctul(vec_cvfo(a)); } + +#endif // CLANG VSX compatibility + +/* + * Common GCC, CLANG compatibility +**/ +#if defined(__GNUG__) && !defined(__IBMCPP__) + +#ifdef vec_cvf +# undef vec_cvf +#endif + +#define VSX_IMPL_CONV_EVEN_4_2(rt, rg, fnm, fn2) \ +VSX_FINLINE(rt) fnm(const rg& a) \ +{ return fn2(vec_sldw(a, a, 1)); } + +VSX_IMPL_CONV_EVEN_4_2(vec_double2, vec_float4, vec_cvf, vec_cvfo) +VSX_IMPL_CONV_EVEN_4_2(vec_double2, vec_int4, vec_ctd, vec_ctdo) +VSX_IMPL_CONV_EVEN_4_2(vec_double2, vec_uint4, vec_ctd, vec_ctdo) + +VSX_IMPL_CONV_EVEN_4_2(vec_dword2, vec_float4, vec_ctsl, vec_ctslo) +VSX_IMPL_CONV_EVEN_4_2(vec_udword2, vec_float4, vec_ctul, vec_ctulo) + +#define VSX_IMPL_CONV_EVEN_2_4(rt, rg, fnm, fn2) \ +VSX_FINLINE(rt) fnm(const rg& a) \ +{ \ + rt v4 = fn2(a); \ + return vec_sldw(v4, v4, 3); \ +} + +VSX_IMPL_CONV_EVEN_2_4(vec_float4, vec_double2, vec_cvf, vec_cvfo) +VSX_IMPL_CONV_EVEN_2_4(vec_float4, vec_dword2, vec_ctf, vec_ctfo) +VSX_IMPL_CONV_EVEN_2_4(vec_float4, vec_udword2, vec_ctf, vec_ctfo) + +VSX_IMPL_CONV_EVEN_2_4(vec_int4, vec_double2, vec_cts, vec_ctso) +VSX_IMPL_CONV_EVEN_2_4(vec_uint4, vec_double2, vec_ctu, vec_ctuo) + +// Only for Eigen! +/* + * changing behavior of conversion intrinsics for gcc has effect on Eigen + * so we redefine old behavior again only on gcc, clang +*/ +#if !defined(__clang__) || __clang_major__ > 4 + // ignoring second arg since Eigen only truncates toward zero +# define VSX_IMPL_CONV_2VARIANT(rt, rg, fnm, fn2) \ + VSX_FINLINE(rt) fnm(const rg& a, int only_truncate) \ + { \ + assert(only_truncate == 0); \ + CV_UNUSED(only_truncate); \ + return fn2(a); \ + } + VSX_IMPL_CONV_2VARIANT(vec_int4, vec_float4, vec_cts, vec_cts) + VSX_IMPL_CONV_2VARIANT(vec_float4, vec_int4, vec_ctf, vec_ctf) + // define vec_cts for converting double precision to signed doubleword + // which isn't combitable with xlc but its okay since Eigen only use it for gcc + VSX_IMPL_CONV_2VARIANT(vec_dword2, vec_double2, vec_cts, vec_ctsl) +#endif // Eigen + +#endif // Common GCC, CLANG compatibility + +/* + * XLC VSX compatibility +**/ +#if defined(__IBMCPP__) + +// vector population count +#define vec_popcntu vec_popcnt + +// overload and redirect with setting second arg to zero +// since we only support conversions without the second arg +#define VSX_IMPL_OVERLOAD_Z2(rt, rg, fnm) \ +VSX_FINLINE(rt) fnm(const rg& a) { return fnm(a, 0); } + +VSX_IMPL_OVERLOAD_Z2(vec_double2, vec_int4, vec_ctd) +VSX_IMPL_OVERLOAD_Z2(vec_double2, vec_uint4, vec_ctd) +VSX_IMPL_OVERLOAD_Z2(vec_double2, vec_dword2, vec_ctd) +VSX_IMPL_OVERLOAD_Z2(vec_double2, vec_udword2, vec_ctd) + +VSX_IMPL_OVERLOAD_Z2(vec_float4, vec_int4, vec_ctf) +VSX_IMPL_OVERLOAD_Z2(vec_float4, vec_uint4, vec_ctf) +VSX_IMPL_OVERLOAD_Z2(vec_float4, vec_dword2, vec_ctf) +VSX_IMPL_OVERLOAD_Z2(vec_float4, vec_udword2, vec_ctf) + +VSX_IMPL_OVERLOAD_Z2(vec_int4, vec_double2, vec_cts) +VSX_IMPL_OVERLOAD_Z2(vec_int4, vec_float4, vec_cts) + +VSX_IMPL_OVERLOAD_Z2(vec_uint4, vec_double2, vec_ctu) +VSX_IMPL_OVERLOAD_Z2(vec_uint4, vec_float4, vec_ctu) + +VSX_IMPL_OVERLOAD_Z2(vec_dword2, vec_double2, vec_ctsl) +VSX_IMPL_OVERLOAD_Z2(vec_dword2, vec_float4, vec_ctsl) + +VSX_IMPL_OVERLOAD_Z2(vec_udword2, vec_double2, vec_ctul) +VSX_IMPL_OVERLOAD_Z2(vec_udword2, vec_float4, vec_ctul) + +// fixme: implement conversions of odd-numbered elements in a dirty way +// since xlc doesn't support VSX registers operand in inline asm. +#define VSX_IMPL_CONV_ODD_4_2(rt, rg, fnm, fn2) \ +VSX_FINLINE(rt) fnm(const rg& a) { return fn2(vec_sldw(a, a, 3)); } + +VSX_IMPL_CONV_ODD_4_2(vec_double2, vec_float4, vec_cvfo, vec_cvf) +VSX_IMPL_CONV_ODD_4_2(vec_double2, vec_int4, vec_ctdo, vec_ctd) +VSX_IMPL_CONV_ODD_4_2(vec_double2, vec_uint4, vec_ctdo, vec_ctd) + +VSX_IMPL_CONV_ODD_4_2(vec_dword2, vec_float4, vec_ctslo, vec_ctsl) +VSX_IMPL_CONV_ODD_4_2(vec_udword2, vec_float4, vec_ctulo, vec_ctul) + +#define VSX_IMPL_CONV_ODD_2_4(rt, rg, fnm, fn2) \ +VSX_FINLINE(rt) fnm(const rg& a) \ +{ \ + rt v4 = fn2(a); \ + return vec_sldw(v4, v4, 1); \ +} + +VSX_IMPL_CONV_ODD_2_4(vec_float4, vec_double2, vec_cvfo, vec_cvf) +VSX_IMPL_CONV_ODD_2_4(vec_float4, vec_dword2, vec_ctfo, vec_ctf) +VSX_IMPL_CONV_ODD_2_4(vec_float4, vec_udword2, vec_ctfo, vec_ctf) + +VSX_IMPL_CONV_ODD_2_4(vec_int4, vec_double2, vec_ctso, vec_cts) +VSX_IMPL_CONV_ODD_2_4(vec_uint4, vec_double2, vec_ctuo, vec_ctu) + +#endif // XLC VSX compatibility + +// ignore GCC warning that caused by -Wunused-but-set-variable in rare cases +#if defined(__GNUG__) && !defined(__clang__) +# define VSX_UNUSED(Tvec) Tvec __attribute__((__unused__)) +#else // CLANG, XLC +# define VSX_UNUSED(Tvec) Tvec +#endif + +// gcc can find his way in casting log int and XLC, CLANG ambiguous +#if defined(__clang__) || defined(__IBMCPP__) + VSX_FINLINE(vec_udword2) vec_splats(uint64 v) + { return vec_splats((unsigned long long) v); } + + VSX_FINLINE(vec_dword2) vec_splats(int64 v) + { return vec_splats((long long) v); } + + VSX_FINLINE(vec_udword2) vec_promote(uint64 a, int b) + { return vec_promote((unsigned long long) a, b); } + + VSX_FINLINE(vec_dword2) vec_promote(int64 a, int b) + { return vec_promote((long long) a, b); } +#endif + +/* + * implement vsx_ld(offset, pointer), vsx_st(vector, offset, pointer) + * load and set using offset depend on the pointer type + * + * implement vsx_ldf(offset, pointer), vsx_stf(vector, offset, pointer) + * load and set using offset depend on fixed bytes size + * + * Note: In clang vec_xl and vec_xst fails to load unaligned addresses + * so we are using vec_vsx_ld, vec_vsx_st instead +*/ + +#if defined(__clang__) && !defined(__IBMCPP__) +# define vsx_ldf vec_vsx_ld +# define vsx_stf vec_vsx_st +#else // GCC , XLC +# define vsx_ldf vec_xl +# define vsx_stf vec_xst +#endif + +#define VSX_OFFSET(o, p) ((o) * sizeof(*(p))) +#define vsx_ld(o, p) vsx_ldf(VSX_OFFSET(o, p), p) +#define vsx_st(v, o, p) vsx_stf(v, VSX_OFFSET(o, p), p) + +/* + * implement vsx_ld2(offset, pointer), vsx_st2(vector, offset, pointer) to load and store double words + * In GCC vec_xl and vec_xst it maps to vec_vsx_ld, vec_vsx_st which doesn't support long long + * and in CLANG we are using vec_vsx_ld, vec_vsx_st because vec_xl, vec_xst fails to load unaligned addresses + * + * In XLC vec_xl and vec_xst fail to cast int64(long int) to long long +*/ +#if (defined(__GNUG__) || defined(__clang__)) && !defined(__IBMCPP__) + VSX_FINLINE(vec_udword2) vsx_ld2(long o, const uint64* p) + { return vec_udword2_c(vsx_ldf(VSX_OFFSET(o, p), (unsigned int*)p)); } + + VSX_FINLINE(vec_dword2) vsx_ld2(long o, const int64* p) + { return vec_dword2_c(vsx_ldf(VSX_OFFSET(o, p), (int*)p)); } + + VSX_FINLINE(void) vsx_st2(const vec_udword2& vec, long o, uint64* p) + { vsx_stf(vec_uint4_c(vec), VSX_OFFSET(o, p), (unsigned int*)p); } + + VSX_FINLINE(void) vsx_st2(const vec_dword2& vec, long o, int64* p) + { vsx_stf(vec_int4_c(vec), VSX_OFFSET(o, p), (int*)p); } +#else // XLC + VSX_FINLINE(vec_udword2) vsx_ld2(long o, const uint64* p) + { return vsx_ldf(VSX_OFFSET(o, p), (unsigned long long*)p); } + + VSX_FINLINE(vec_dword2) vsx_ld2(long o, const int64* p) + { return vsx_ldf(VSX_OFFSET(o, p), (long long*)p); } + + VSX_FINLINE(void) vsx_st2(const vec_udword2& vec, long o, uint64* p) + { vsx_stf(vec, VSX_OFFSET(o, p), (unsigned long long*)p); } + + VSX_FINLINE(void) vsx_st2(const vec_dword2& vec, long o, int64* p) + { vsx_stf(vec, VSX_OFFSET(o, p), (long long*)p); } +#endif + +// Store lower 8 byte +#define vec_st_l8(v, p) *((uint64*)(p)) = vec_extract(vec_udword2_c(v), 0) + +// Store higher 8 byte +#define vec_st_h8(v, p) *((uint64*)(p)) = vec_extract(vec_udword2_c(v), 1) + +// Load 64-bits of integer data to lower part +#define VSX_IMPL_LOAD_L8(Tvec, Tp) \ +VSX_FINLINE(Tvec) vec_ld_l8(const Tp *p) \ +{ return ((Tvec)vec_promote(*((uint64*)p), 0)); } + +VSX_IMPL_LOAD_L8(vec_uchar16, uchar) +VSX_IMPL_LOAD_L8(vec_char16, schar) +VSX_IMPL_LOAD_L8(vec_ushort8, ushort) +VSX_IMPL_LOAD_L8(vec_short8, short) +VSX_IMPL_LOAD_L8(vec_uint4, uint) +VSX_IMPL_LOAD_L8(vec_int4, int) +VSX_IMPL_LOAD_L8(vec_float4, float) +VSX_IMPL_LOAD_L8(vec_udword2, uint64) +VSX_IMPL_LOAD_L8(vec_dword2, int64) +VSX_IMPL_LOAD_L8(vec_double2, double) + +// logical not +#define vec_not(a) vec_nor(a, a) + +// power9 yaya +// not equal +#ifndef vec_cmpne +# define vec_cmpne(a, b) vec_not(vec_cmpeq(a, b)) +#endif + +// absolute difference +#ifndef vec_absd +# define vec_absd(a, b) vec_sub(vec_max(a, b), vec_min(a, b)) +#endif + +/* + * Implement vec_unpacklu and vec_unpackhu + * since vec_unpackl, vec_unpackh only support signed integers +**/ +#define VSX_IMPL_UNPACKU(rt, rg, zero) \ +VSX_FINLINE(rt) vec_unpacklu(const rg& a) \ +{ return (rt)(vec_mergel(a, zero)); } \ +VSX_FINLINE(rt) vec_unpackhu(const rg& a) \ +{ return (rt)(vec_mergeh(a, zero)); } + +VSX_IMPL_UNPACKU(vec_ushort8, vec_uchar16, vec_uchar16_z) +VSX_IMPL_UNPACKU(vec_uint4, vec_ushort8, vec_ushort8_z) +VSX_IMPL_UNPACKU(vec_udword2, vec_uint4, vec_uint4_z) + +/* + * Implement vec_mergesqe and vec_mergesqo + * Merges the sequence values of even and odd elements of two vectors +*/ +#define VSX_IMPL_PERM(rt, fnm, ...) \ +VSX_FINLINE(rt) fnm(const rt& a, const rt& b) \ +{ static const vec_uchar16 perm = {__VA_ARGS__}; return vec_perm(a, b, perm); } + +// 16 +#define perm16_mergesqe 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30 +#define perm16_mergesqo 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 +VSX_IMPL_PERM(vec_uchar16, vec_mergesqe, perm16_mergesqe) +VSX_IMPL_PERM(vec_uchar16, vec_mergesqo, perm16_mergesqo) +VSX_IMPL_PERM(vec_char16, vec_mergesqe, perm16_mergesqe) +VSX_IMPL_PERM(vec_char16, vec_mergesqo, perm16_mergesqo) +// 8 +#define perm8_mergesqe 0, 1, 4, 5, 8, 9, 12, 13, 16, 17, 20, 21, 24, 25, 28, 29 +#define perm8_mergesqo 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 +VSX_IMPL_PERM(vec_ushort8, vec_mergesqe, perm8_mergesqe) +VSX_IMPL_PERM(vec_ushort8, vec_mergesqo, perm8_mergesqo) +VSX_IMPL_PERM(vec_short8, vec_mergesqe, perm8_mergesqe) +VSX_IMPL_PERM(vec_short8, vec_mergesqo, perm8_mergesqo) +// 4 +#define perm4_mergesqe 0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27 +#define perm4_mergesqo 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31 +VSX_IMPL_PERM(vec_uint4, vec_mergesqe, perm4_mergesqe) +VSX_IMPL_PERM(vec_uint4, vec_mergesqo, perm4_mergesqo) +VSX_IMPL_PERM(vec_int4, vec_mergesqe, perm4_mergesqe) +VSX_IMPL_PERM(vec_int4, vec_mergesqo, perm4_mergesqo) +VSX_IMPL_PERM(vec_float4, vec_mergesqe, perm4_mergesqe) +VSX_IMPL_PERM(vec_float4, vec_mergesqo, perm4_mergesqo) +// 2 +VSX_REDIRECT_2RG(vec_double2, vec_double2, vec_mergesqe, vec_mergeh) +VSX_REDIRECT_2RG(vec_double2, vec_double2, vec_mergesqo, vec_mergel) +VSX_REDIRECT_2RG(vec_dword2, vec_dword2, vec_mergesqe, vec_mergeh) +VSX_REDIRECT_2RG(vec_dword2, vec_dword2, vec_mergesqo, vec_mergel) +VSX_REDIRECT_2RG(vec_udword2, vec_udword2, vec_mergesqe, vec_mergeh) +VSX_REDIRECT_2RG(vec_udword2, vec_udword2, vec_mergesqo, vec_mergel) + +/* + * Implement vec_mergesqh and vec_mergesql + * Merges the sequence most and least significant halves of two vectors +*/ +#define VSX_IMPL_MERGESQHL(Tvec) \ +VSX_FINLINE(Tvec) vec_mergesqh(const Tvec& a, const Tvec& b) \ +{ return (Tvec)vec_mergeh(vec_udword2_c(a), vec_udword2_c(b)); } \ +VSX_FINLINE(Tvec) vec_mergesql(const Tvec& a, const Tvec& b) \ +{ return (Tvec)vec_mergel(vec_udword2_c(a), vec_udword2_c(b)); } +VSX_IMPL_MERGESQHL(vec_uchar16) +VSX_IMPL_MERGESQHL(vec_char16) +VSX_IMPL_MERGESQHL(vec_ushort8) +VSX_IMPL_MERGESQHL(vec_short8) +VSX_IMPL_MERGESQHL(vec_uint4) +VSX_IMPL_MERGESQHL(vec_int4) +VSX_IMPL_MERGESQHL(vec_float4) +VSX_REDIRECT_2RG(vec_udword2, vec_udword2, vec_mergesqh, vec_mergeh) +VSX_REDIRECT_2RG(vec_udword2, vec_udword2, vec_mergesql, vec_mergel) +VSX_REDIRECT_2RG(vec_dword2, vec_dword2, vec_mergesqh, vec_mergeh) +VSX_REDIRECT_2RG(vec_dword2, vec_dword2, vec_mergesql, vec_mergel) +VSX_REDIRECT_2RG(vec_double2, vec_double2, vec_mergesqh, vec_mergeh) +VSX_REDIRECT_2RG(vec_double2, vec_double2, vec_mergesql, vec_mergel) + + +// 2 and 4 channels interleave for all types except 2 lanes +#define VSX_IMPL_ST_INTERLEAVE(Tp, Tvec) \ +VSX_FINLINE(void) vec_st_interleave(const Tvec& a, const Tvec& b, Tp* ptr) \ +{ \ + vsx_stf(vec_mergeh(a, b), 0, ptr); \ + vsx_stf(vec_mergel(a, b), 16, ptr); \ +} \ +VSX_FINLINE(void) vec_st_interleave(const Tvec& a, const Tvec& b, \ + const Tvec& c, const Tvec& d, Tp* ptr) \ +{ \ + Tvec ac = vec_mergeh(a, c); \ + Tvec bd = vec_mergeh(b, d); \ + vsx_stf(vec_mergeh(ac, bd), 0, ptr); \ + vsx_stf(vec_mergel(ac, bd), 16, ptr); \ + ac = vec_mergel(a, c); \ + bd = vec_mergel(b, d); \ + vsx_stf(vec_mergeh(ac, bd), 32, ptr); \ + vsx_stf(vec_mergel(ac, bd), 48, ptr); \ +} +VSX_IMPL_ST_INTERLEAVE(uchar, vec_uchar16) +VSX_IMPL_ST_INTERLEAVE(schar, vec_char16) +VSX_IMPL_ST_INTERLEAVE(ushort, vec_ushort8) +VSX_IMPL_ST_INTERLEAVE(short, vec_short8) +VSX_IMPL_ST_INTERLEAVE(uint, vec_uint4) +VSX_IMPL_ST_INTERLEAVE(int, vec_int4) +VSX_IMPL_ST_INTERLEAVE(float, vec_float4) + +// 2 and 4 channels deinterleave for 16 lanes +#define VSX_IMPL_ST_DINTERLEAVE_8(Tp, Tvec) \ +VSX_FINLINE(void) vec_ld_deinterleave(const Tp* ptr, Tvec& a, Tvec& b) \ +{ \ + Tvec v0 = vsx_ld(0, ptr); \ + Tvec v1 = vsx_ld(16, ptr); \ + a = vec_mergesqe(v0, v1); \ + b = vec_mergesqo(v0, v1); \ +} \ +VSX_FINLINE(void) vec_ld_deinterleave(const Tp* ptr, Tvec& a, Tvec& b, \ + Tvec& c, Tvec& d) \ +{ \ + Tvec v0 = vsx_ld(0, ptr); \ + Tvec v1 = vsx_ld(16, ptr); \ + Tvec v2 = vsx_ld(32, ptr); \ + Tvec v3 = vsx_ld(48, ptr); \ + Tvec m0 = vec_mergesqe(v0, v1); \ + Tvec m1 = vec_mergesqe(v2, v3); \ + a = vec_mergesqe(m0, m1); \ + c = vec_mergesqo(m0, m1); \ + m0 = vec_mergesqo(v0, v1); \ + m1 = vec_mergesqo(v2, v3); \ + b = vec_mergesqe(m0, m1); \ + d = vec_mergesqo(m0, m1); \ +} +VSX_IMPL_ST_DINTERLEAVE_8(uchar, vec_uchar16) +VSX_IMPL_ST_DINTERLEAVE_8(schar, vec_char16) + +// 2 and 4 channels deinterleave for 8 lanes +#define VSX_IMPL_ST_DINTERLEAVE_16(Tp, Tvec) \ +VSX_FINLINE(void) vec_ld_deinterleave(const Tp* ptr, Tvec& a, Tvec& b) \ +{ \ + Tvec v0 = vsx_ld(0, ptr); \ + Tvec v1 = vsx_ld(8, ptr); \ + a = vec_mergesqe(v0, v1); \ + b = vec_mergesqo(v0, v1); \ +} \ +VSX_FINLINE(void) vec_ld_deinterleave(const Tp* ptr, Tvec& a, Tvec& b, \ + Tvec& c, Tvec& d) \ +{ \ + Tvec v0 = vsx_ld(0, ptr); \ + Tvec v1 = vsx_ld(8, ptr); \ + Tvec m0 = vec_mergeh(v0, v1); \ + Tvec m1 = vec_mergel(v0, v1); \ + Tvec ab0 = vec_mergeh(m0, m1); \ + Tvec cd0 = vec_mergel(m0, m1); \ + v0 = vsx_ld(16, ptr); \ + v1 = vsx_ld(24, ptr); \ + m0 = vec_mergeh(v0, v1); \ + m1 = vec_mergel(v0, v1); \ + Tvec ab1 = vec_mergeh(m0, m1); \ + Tvec cd1 = vec_mergel(m0, m1); \ + a = vec_mergesqh(ab0, ab1); \ + b = vec_mergesql(ab0, ab1); \ + c = vec_mergesqh(cd0, cd1); \ + d = vec_mergesql(cd0, cd1); \ +} +VSX_IMPL_ST_DINTERLEAVE_16(ushort, vec_ushort8) +VSX_IMPL_ST_DINTERLEAVE_16(short, vec_short8) + +// 2 and 4 channels deinterleave for 4 lanes +#define VSX_IMPL_ST_DINTERLEAVE_32(Tp, Tvec) \ +VSX_FINLINE(void) vec_ld_deinterleave(const Tp* ptr, Tvec& a, Tvec& b) \ +{ \ + a = vsx_ld(0, ptr); \ + b = vsx_ld(4, ptr); \ + Tvec m0 = vec_mergeh(a, b); \ + Tvec m1 = vec_mergel(a, b); \ + a = vec_mergeh(m0, m1); \ + b = vec_mergel(m0, m1); \ +} \ +VSX_FINLINE(void) vec_ld_deinterleave(const Tp* ptr, Tvec& a, Tvec& b, \ + Tvec& c, Tvec& d) \ +{ \ + Tvec v0 = vsx_ld(0, ptr); \ + Tvec v1 = vsx_ld(4, ptr); \ + Tvec v2 = vsx_ld(8, ptr); \ + Tvec v3 = vsx_ld(12, ptr); \ + Tvec m0 = vec_mergeh(v0, v2); \ + Tvec m1 = vec_mergeh(v1, v3); \ + a = vec_mergeh(m0, m1); \ + b = vec_mergel(m0, m1); \ + m0 = vec_mergel(v0, v2); \ + m1 = vec_mergel(v1, v3); \ + c = vec_mergeh(m0, m1); \ + d = vec_mergel(m0, m1); \ +} +VSX_IMPL_ST_DINTERLEAVE_32(uint, vec_uint4) +VSX_IMPL_ST_DINTERLEAVE_32(int, vec_int4) +VSX_IMPL_ST_DINTERLEAVE_32(float, vec_float4) + +// 2 and 4 channels interleave and deinterleave for 2 lanes +#define VSX_IMPL_ST_D_INTERLEAVE_64(Tp, Tvec, ld_func, st_func) \ +VSX_FINLINE(void) vec_st_interleave(const Tvec& a, const Tvec& b, Tp* ptr) \ +{ \ + st_func(vec_mergeh(a, b), 0, ptr); \ + st_func(vec_mergel(a, b), 2, ptr); \ +} \ +VSX_FINLINE(void) vec_st_interleave(const Tvec& a, const Tvec& b, \ + const Tvec& c, const Tvec& d, Tp* ptr) \ +{ \ + st_func(vec_mergeh(a, b), 0, ptr); \ + st_func(vec_mergeh(c, d), 2, ptr); \ + st_func(vec_mergel(a, b), 4, ptr); \ + st_func(vec_mergel(c, d), 6, ptr); \ +} \ +VSX_FINLINE(void) vec_ld_deinterleave(const Tp* ptr, Tvec& a, Tvec& b) \ +{ \ + Tvec m0 = ld_func(0, ptr); \ + Tvec m1 = ld_func(2, ptr); \ + a = vec_mergeh(m0, m1); \ + b = vec_mergel(m0, m1); \ +} \ +VSX_FINLINE(void) vec_ld_deinterleave(const Tp* ptr, Tvec& a, Tvec& b, \ + Tvec& c, Tvec& d) \ +{ \ + Tvec v0 = ld_func(0, ptr); \ + Tvec v1 = ld_func(2, ptr); \ + Tvec v2 = ld_func(4, ptr); \ + Tvec v3 = ld_func(6, ptr); \ + a = vec_mergeh(v0, v2); \ + b = vec_mergel(v0, v2); \ + c = vec_mergeh(v1, v3); \ + d = vec_mergel(v1, v3); \ +} +VSX_IMPL_ST_D_INTERLEAVE_64(int64, vec_dword2, vsx_ld2, vsx_st2) +VSX_IMPL_ST_D_INTERLEAVE_64(uint64, vec_udword2, vsx_ld2, vsx_st2) +VSX_IMPL_ST_D_INTERLEAVE_64(double, vec_double2, vsx_ld, vsx_st) + +/* 3 channels */ +#define VSX_IMPL_ST_INTERLEAVE_3CH_16(Tp, Tvec) \ +VSX_FINLINE(void) vec_st_interleave(const Tvec& a, const Tvec& b, \ + const Tvec& c, Tp* ptr) \ +{ \ + static const vec_uchar16 a12 = {0, 16, 0, 1, 17, 0, 2, 18, 0, 3, 19, 0, 4, 20, 0, 5}; \ + static const vec_uchar16 a123 = {0, 1, 16, 3, 4, 17, 6, 7, 18, 9, 10, 19, 12, 13, 20, 15}; \ + vsx_st(vec_perm(vec_perm(a, b, a12), c, a123), 0, ptr); \ + static const vec_uchar16 b12 = {21, 0, 6, 22, 0, 7, 23, 0, 8, 24, 0, 9, 25, 0, 10, 26}; \ + static const vec_uchar16 b123 = {0, 21, 2, 3, 22, 5, 6, 23, 8, 9, 24, 11, 12, 25, 14, 15}; \ + vsx_st(vec_perm(vec_perm(a, b, b12), c, b123), 16, ptr); \ + static const vec_uchar16 c12 = {0, 11, 27, 0, 12, 28, 0, 13, 29, 0, 14, 30, 0, 15, 31, 0}; \ + static const vec_uchar16 c123 = {26, 1, 2, 27, 4, 5, 28, 7, 8, 29, 10, 11, 30, 13, 14, 31}; \ + vsx_st(vec_perm(vec_perm(a, b, c12), c, c123), 32, ptr); \ +} \ +VSX_FINLINE(void) vec_ld_deinterleave(const Tp* ptr, Tvec& a, Tvec& b, Tvec& c) \ +{ \ + Tvec v1 = vsx_ld(0, ptr); \ + Tvec v2 = vsx_ld(16, ptr); \ + Tvec v3 = vsx_ld(32, ptr); \ + static const vec_uchar16 a12_perm = {0, 3, 6, 9, 12, 15, 18, 21, 24, 27, 30, 0, 0, 0, 0, 0}; \ + static const vec_uchar16 a123_perm = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 17, 20, 23, 26, 29}; \ + a = vec_perm(vec_perm(v1, v2, a12_perm), v3, a123_perm); \ + static const vec_uchar16 b12_perm = {1, 4, 7, 10, 13, 16, 19, 22, 25, 28, 31, 0, 0, 0, 0, 0}; \ + static const vec_uchar16 b123_perm = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 18, 21, 24, 27, 30}; \ + b = vec_perm(vec_perm(v1, v2, b12_perm), v3, b123_perm); \ + static const vec_uchar16 c12_perm = {2, 5, 8, 11, 14, 17, 20, 23, 26, 29, 0, 0, 0, 0, 0, 0}; \ + static const vec_uchar16 c123_perm = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 16, 19, 22, 25, 28, 31}; \ + c = vec_perm(vec_perm(v1, v2, c12_perm), v3, c123_perm); \ +} +VSX_IMPL_ST_INTERLEAVE_3CH_16(uchar, vec_uchar16) +VSX_IMPL_ST_INTERLEAVE_3CH_16(schar, vec_char16) + +#define VSX_IMPL_ST_INTERLEAVE_3CH_8(Tp, Tvec) \ +VSX_FINLINE(void) vec_st_interleave(const Tvec& a, const Tvec& b, \ + const Tvec& c, Tp* ptr) \ +{ \ + static const vec_uchar16 a12 = {0, 1, 16, 17, 0, 0, 2, 3, 18, 19, 0, 0, 4, 5, 20, 21}; \ + static const vec_uchar16 a123 = {0, 1, 2, 3, 16, 17, 6, 7, 8, 9, 18, 19, 12, 13, 14, 15}; \ + vsx_st(vec_perm(vec_perm(a, b, a12), c, a123), 0, ptr); \ + static const vec_uchar16 b12 = {0, 0, 6, 7, 22, 23, 0, 0, 8, 9, 24, 25, 0, 0, 10, 11}; \ + static const vec_uchar16 b123 = {20, 21, 2, 3, 4, 5, 22, 23, 8, 9, 10, 11, 24, 25, 14, 15}; \ + vsx_st(vec_perm(vec_perm(a, b, b12), c, b123), 8, ptr); \ + static const vec_uchar16 c12 = {26, 27, 0, 0, 12, 13, 28, 29, 0, 0, 14, 15, 30, 31, 0, 0}; \ + static const vec_uchar16 c123 = {0, 1, 26, 27, 4, 5, 6, 7, 28, 29, 10, 11, 12, 13, 30, 31}; \ + vsx_st(vec_perm(vec_perm(a, b, c12), c, c123), 16, ptr); \ +} \ +VSX_FINLINE(void) vec_ld_deinterleave(const Tp* ptr, Tvec& a, Tvec& b, Tvec& c) \ +{ \ + Tvec v1 = vsx_ld(0, ptr); \ + Tvec v2 = vsx_ld(8, ptr); \ + Tvec v3 = vsx_ld(16, ptr); \ + static const vec_uchar16 a12_perm = {0, 1, 6, 7, 12, 13, 18, 19, 24, 25, 30, 31, 0, 0, 0, 0}; \ + static const vec_uchar16 a123_perm = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 20, 21, 26, 27}; \ + a = vec_perm(vec_perm(v1, v2, a12_perm), v3, a123_perm); \ + static const vec_uchar16 b12_perm = {2, 3, 8, 9, 14, 15, 20, 21, 26, 27, 0, 0, 0, 0, 0, 0}; \ + static const vec_uchar16 b123_perm = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 16, 17, 22, 23, 28, 29}; \ + b = vec_perm(vec_perm(v1, v2, b12_perm), v3, b123_perm); \ + static const vec_uchar16 c12_perm = {4, 5, 10, 11, 16, 17, 22, 23, 28, 29, 0, 0, 0, 0, 0, 0}; \ + static const vec_uchar16 c123_perm = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 18, 19, 24, 25, 30, 31}; \ + c = vec_perm(vec_perm(v1, v2, c12_perm), v3, c123_perm); \ +} +VSX_IMPL_ST_INTERLEAVE_3CH_8(ushort, vec_ushort8) +VSX_IMPL_ST_INTERLEAVE_3CH_8(short, vec_short8) + +#define VSX_IMPL_ST_INTERLEAVE_3CH_4(Tp, Tvec) \ +VSX_FINLINE(void) vec_st_interleave(const Tvec& a, const Tvec& b, \ + const Tvec& c, Tp* ptr) \ +{ \ + Tvec hbc = vec_mergeh(b, c); \ + static const vec_uchar16 ahbc = {0, 1, 2, 3, 16, 17, 18, 19, 20, 21, 22, 23, 4, 5, 6, 7}; \ + vsx_st(vec_perm(a, hbc, ahbc), 0, ptr); \ + Tvec lab = vec_mergel(a, b); \ + vsx_st(vec_sld(lab, hbc, 8), 4, ptr); \ + static const vec_uchar16 clab = {8, 9, 10, 11, 24, 25, 26, 27, 28, 29, 30, 31, 12, 13, 14, 15};\ + vsx_st(vec_perm(c, lab, clab), 8, ptr); \ +} \ +VSX_FINLINE(void) vec_ld_deinterleave(const Tp* ptr, Tvec& a, Tvec& b, Tvec& c) \ +{ \ + Tvec v1 = vsx_ld(0, ptr); \ + Tvec v2 = vsx_ld(4, ptr); \ + Tvec v3 = vsx_ld(8, ptr); \ + static const vec_uchar16 flp = {0, 1, 2, 3, 12, 13, 14, 15, 16, 17, 18, 19, 28, 29, 30, 31}; \ + a = vec_perm(v1, vec_sld(v3, v2, 8), flp); \ + static const vec_uchar16 flp2 = {28, 29, 30, 31, 0, 1, 2, 3, 12, 13, 14, 15, 16, 17, 18, 19}; \ + b = vec_perm(v2, vec_sld(v1, v3, 8), flp2); \ + c = vec_perm(vec_sld(v2, v1, 8), v3, flp); \ +} +VSX_IMPL_ST_INTERLEAVE_3CH_4(uint, vec_uint4) +VSX_IMPL_ST_INTERLEAVE_3CH_4(int, vec_int4) +VSX_IMPL_ST_INTERLEAVE_3CH_4(float, vec_float4) + +#define VSX_IMPL_ST_INTERLEAVE_3CH_2(Tp, Tvec, ld_func, st_func) \ +VSX_FINLINE(void) vec_st_interleave(const Tvec& a, const Tvec& b, \ + const Tvec& c, Tp* ptr) \ +{ \ + st_func(vec_mergeh(a, b), 0, ptr); \ + st_func(vec_permi(c, a, 1), 2, ptr); \ + st_func(vec_mergel(b, c), 4, ptr); \ +} \ +VSX_FINLINE(void) vec_ld_deinterleave(const Tp* ptr, Tvec& a, \ + Tvec& b, Tvec& c) \ +{ \ + Tvec v1 = ld_func(0, ptr); \ + Tvec v2 = ld_func(2, ptr); \ + Tvec v3 = ld_func(4, ptr); \ + a = vec_permi(v1, v2, 1); \ + b = vec_permi(v1, v3, 2); \ + c = vec_permi(v2, v3, 1); \ +} +VSX_IMPL_ST_INTERLEAVE_3CH_2(int64, vec_dword2, vsx_ld2, vsx_st2) +VSX_IMPL_ST_INTERLEAVE_3CH_2(uint64, vec_udword2, vsx_ld2, vsx_st2) +VSX_IMPL_ST_INTERLEAVE_3CH_2(double, vec_double2, vsx_ld, vsx_st) + +#endif // CV_VSX + +//! @} + +#endif // OPENCV_HAL_VSX_UTILS_HPP diff --git a/IPL/include/opencv/opencv2/core/wimage.hpp b/IPL/include/opencv/opencv2/core/wimage.hpp deleted file mode 100644 index ef9d398..0000000 --- a/IPL/include/opencv/opencv2/core/wimage.hpp +++ /dev/null @@ -1,603 +0,0 @@ -/*M////////////////////////////////////////////////////////////////////////////// -// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. -// -// By downloading, copying, installing or using the software you agree to -// this license. If you do not agree to this license, do not download, -// install, copy or use the software. -// -// License Agreement -// For Open Source Computer Vision Library -// -// Copyright (C) 2008, Google, all rights reserved. -// Third party copyrights are property of their respective owners. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are met: -// -// * Redistribution's of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// * Redistribution's in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// * The name of Intel Corporation or contributors may not be used to endorse -// or promote products derived from this software without specific -// prior written permission. -// -// This software is provided by the copyright holders and contributors "as is" -// and any express or implied warranties, including, but not limited to, the -// implied warranties of merchantability and fitness for a particular purpose -// are disclaimed. In no event shall the Intel Corporation or contributors be -// liable for any direct, indirect, incidental, special, exemplary, or -// consequential damages -// (including, but not limited to, procurement of substitute goods or services; -// loss of use, data, or profits; or business interruption) however caused -// and on any theory of liability, whether in contract, strict liability, -// or tort (including negligence or otherwise) arising in any way out of -// the use of this software, even if advised of the possibility of such damage. -///////////////////////////////////////////////////////////////////////////////// -//M*/ - -#ifndef __OPENCV_CORE_WIMAGE_HPP__ -#define __OPENCV_CORE_WIMAGE_HPP__ - -#include "opencv2/core/core_c.h" - -#ifdef __cplusplus - -namespace cv { - -//! @addtogroup core -//! @{ - -template class WImage; -template class WImageBuffer; -template class WImageView; - -template class WImageC; -template class WImageBufferC; -template class WImageViewC; - -// Commonly used typedefs. -typedef WImage WImage_b; -typedef WImageView WImageView_b; -typedef WImageBuffer WImageBuffer_b; - -typedef WImageC WImage1_b; -typedef WImageViewC WImageView1_b; -typedef WImageBufferC WImageBuffer1_b; - -typedef WImageC WImage3_b; -typedef WImageViewC WImageView3_b; -typedef WImageBufferC WImageBuffer3_b; - -typedef WImage WImage_f; -typedef WImageView WImageView_f; -typedef WImageBuffer WImageBuffer_f; - -typedef WImageC WImage1_f; -typedef WImageViewC WImageView1_f; -typedef WImageBufferC WImageBuffer1_f; - -typedef WImageC WImage3_f; -typedef WImageViewC WImageView3_f; -typedef WImageBufferC WImageBuffer3_f; - -// There isn't a standard for signed and unsigned short so be more -// explicit in the typename for these cases. -typedef WImage WImage_16s; -typedef WImageView WImageView_16s; -typedef WImageBuffer WImageBuffer_16s; - -typedef WImageC WImage1_16s; -typedef WImageViewC WImageView1_16s; -typedef WImageBufferC WImageBuffer1_16s; - -typedef WImageC WImage3_16s; -typedef WImageViewC WImageView3_16s; -typedef WImageBufferC WImageBuffer3_16s; - -typedef WImage WImage_16u; -typedef WImageView WImageView_16u; -typedef WImageBuffer WImageBuffer_16u; - -typedef WImageC WImage1_16u; -typedef WImageViewC WImageView1_16u; -typedef WImageBufferC WImageBuffer1_16u; - -typedef WImageC WImage3_16u; -typedef WImageViewC WImageView3_16u; -typedef WImageBufferC WImageBuffer3_16u; - -/** @brief Image class which provides a thin layer around an IplImage. - -The goals of the class design are: - - -# All the data has explicit ownership to avoid memory leaks - -# No hidden allocations or copies for performance. - -# Easy access to OpenCV methods (which will access IPP if available) - -# Can easily treat external data as an image - -# Easy to create images which are subsets of other images - -# Fast pixel access which can take advantage of number of channels if known at compile time. - -The WImage class is the image class which provides the data accessors. The 'W' comes from the fact -that it is also a wrapper around the popular but inconvenient IplImage class. A WImage can be -constructed either using a WImageBuffer class which allocates and frees the data, or using a -WImageView class which constructs a subimage or a view into external data. The view class does no -memory management. Each class actually has two versions, one when the number of channels is known -at compile time and one when it isn't. Using the one with the number of channels specified can -provide some compile time optimizations by using the fact that the number of channels is a -constant. - -We use the convention (c,r) to refer to column c and row r with (0,0) being the upper left corner. -This is similar to standard Euclidean coordinates with the first coordinate varying in the -horizontal direction and the second coordinate varying in the vertical direction. Thus (c,r) is -usually in the domain [0, width) X [0, height) - -Example usage: -@code -WImageBuffer3_b im(5,7); // Make a 5X7 3 channel image of type uchar -WImageView3_b sub_im(im, 2,2, 3,3); // 3X3 submatrix -vector vec(10, 3.0f); -WImageView1_f user_im(&vec[0], 2, 5); // 2X5 image w/ supplied data - -im.SetZero(); // same as cvSetZero(im.Ipl()) -*im(2, 3) = 15; // Modify the element at column 2, row 3 -MySetRand(&sub_im); - -// Copy the second row into the first. This can be done with no memory -// allocation and will use SSE if IPP is available. -int w = im.Width(); -im.View(0,0, w,1).CopyFrom(im.View(0,1, w,1)); - -// Doesn't care about source of data since using WImage -void MySetRand(WImage_b* im) { // Works with any number of channels -for (int r = 0; r < im->Height(); ++r) { - float* row = im->Row(r); - for (int c = 0; c < im->Width(); ++c) { - for (int ch = 0; ch < im->Channels(); ++ch, ++row) { - *row = uchar(rand() & 255); - } - } -} -} -@endcode - -Functions that are not part of the basic image allocation, viewing, and access should come from -OpenCV, except some useful functions that are not part of OpenCV can be found in wimage_util.h -*/ -template -class WImage -{ -public: - typedef T BaseType; - - // WImage is an abstract class with no other virtual methods so make the - // destructor virtual. - virtual ~WImage() = 0; - - // Accessors - IplImage* Ipl() {return image_; } - const IplImage* Ipl() const {return image_; } - T* ImageData() { return reinterpret_cast(image_->imageData); } - const T* ImageData() const { - return reinterpret_cast(image_->imageData); - } - - int Width() const {return image_->width; } - int Height() const {return image_->height; } - - // WidthStep is the number of bytes to go to the pixel with the next y coord - int WidthStep() const {return image_->widthStep; } - - int Channels() const {return image_->nChannels; } - int ChannelSize() const {return sizeof(T); } // number of bytes per channel - - // Number of bytes per pixel - int PixelSize() const {return Channels() * ChannelSize(); } - - // Return depth type (e.g. IPL_DEPTH_8U, IPL_DEPTH_32F) which is the number - // of bits per channel and with the signed bit set. - // This is known at compile time using specializations. - int Depth() const; - - inline const T* Row(int r) const { - return reinterpret_cast(image_->imageData + r*image_->widthStep); - } - - inline T* Row(int r) { - return reinterpret_cast(image_->imageData + r*image_->widthStep); - } - - // Pixel accessors which returns a pointer to the start of the channel - inline T* operator() (int c, int r) { - return reinterpret_cast(image_->imageData + r*image_->widthStep) + - c*Channels(); - } - - inline const T* operator() (int c, int r) const { - return reinterpret_cast(image_->imageData + r*image_->widthStep) + - c*Channels(); - } - - // Copy the contents from another image which is just a convenience to cvCopy - void CopyFrom(const WImage& src) { cvCopy(src.Ipl(), image_); } - - // Set contents to zero which is just a convenient to cvSetZero - void SetZero() { cvSetZero(image_); } - - // Construct a view into a region of this image - WImageView View(int c, int r, int width, int height); - -protected: - // Disallow copy and assignment - WImage(const WImage&); - void operator=(const WImage&); - - explicit WImage(IplImage* img) : image_(img) { - assert(!img || img->depth == Depth()); - } - - void SetIpl(IplImage* image) { - assert(!image || image->depth == Depth()); - image_ = image; - } - - IplImage* image_; -}; - - -/** Image class when both the pixel type and number of channels -are known at compile time. This wrapper will speed up some of the operations -like accessing individual pixels using the () operator. -*/ -template -class WImageC : public WImage -{ -public: - typedef typename WImage::BaseType BaseType; - enum { kChannels = C }; - - explicit WImageC(IplImage* img) : WImage(img) { - assert(!img || img->nChannels == Channels()); - } - - // Construct a view into a region of this image - WImageViewC View(int c, int r, int width, int height); - - // Copy the contents from another image which is just a convenience to cvCopy - void CopyFrom(const WImageC& src) { - cvCopy(src.Ipl(), WImage::image_); - } - - // WImageC is an abstract class with no other virtual methods so make the - // destructor virtual. - virtual ~WImageC() = 0; - - int Channels() const {return C; } - -protected: - // Disallow copy and assignment - WImageC(const WImageC&); - void operator=(const WImageC&); - - void SetIpl(IplImage* image) { - assert(!image || image->depth == WImage::Depth()); - WImage::SetIpl(image); - } -}; - -/** Image class which owns the data, so it can be allocated and is always -freed. It cannot be copied but can be explicity cloned. -*/ -template -class WImageBuffer : public WImage -{ -public: - typedef typename WImage::BaseType BaseType; - - // Default constructor which creates an object that can be - WImageBuffer() : WImage(0) {} - - WImageBuffer(int width, int height, int nchannels) : WImage(0) { - Allocate(width, height, nchannels); - } - - // Constructor which takes ownership of a given IplImage so releases - // the image on destruction. - explicit WImageBuffer(IplImage* img) : WImage(img) {} - - // Allocate an image. Does nothing if current size is the same as - // the new size. - void Allocate(int width, int height, int nchannels); - - // Set the data to point to an image, releasing the old data - void SetIpl(IplImage* img) { - ReleaseImage(); - WImage::SetIpl(img); - } - - // Clone an image which reallocates the image if of a different dimension. - void CloneFrom(const WImage& src) { - Allocate(src.Width(), src.Height(), src.Channels()); - CopyFrom(src); - } - - ~WImageBuffer() { - ReleaseImage(); - } - - // Release the image if it isn't null. - void ReleaseImage() { - if (WImage::image_) { - IplImage* image = WImage::image_; - cvReleaseImage(&image); - WImage::SetIpl(0); - } - } - - bool IsNull() const {return WImage::image_ == NULL; } - -private: - // Disallow copy and assignment - WImageBuffer(const WImageBuffer&); - void operator=(const WImageBuffer&); -}; - -/** Like a WImageBuffer class but when the number of channels is known at compile time. -*/ -template -class WImageBufferC : public WImageC -{ -public: - typedef typename WImage::BaseType BaseType; - enum { kChannels = C }; - - // Default constructor which creates an object that can be - WImageBufferC() : WImageC(0) {} - - WImageBufferC(int width, int height) : WImageC(0) { - Allocate(width, height); - } - - // Constructor which takes ownership of a given IplImage so releases - // the image on destruction. - explicit WImageBufferC(IplImage* img) : WImageC(img) {} - - // Allocate an image. Does nothing if current size is the same as - // the new size. - void Allocate(int width, int height); - - // Set the data to point to an image, releasing the old data - void SetIpl(IplImage* img) { - ReleaseImage(); - WImageC::SetIpl(img); - } - - // Clone an image which reallocates the image if of a different dimension. - void CloneFrom(const WImageC& src) { - Allocate(src.Width(), src.Height()); - CopyFrom(src); - } - - ~WImageBufferC() { - ReleaseImage(); - } - - // Release the image if it isn't null. - void ReleaseImage() { - if (WImage::image_) { - IplImage* image = WImage::image_; - cvReleaseImage(&image); - WImageC::SetIpl(0); - } - } - - bool IsNull() const {return WImage::image_ == NULL; } - -private: - // Disallow copy and assignment - WImageBufferC(const WImageBufferC&); - void operator=(const WImageBufferC&); -}; - -/** View into an image class which allows treating a subimage as an image or treating external data -as an image -*/ -template class WImageView : public WImage -{ -public: - typedef typename WImage::BaseType BaseType; - - // Construct a subimage. No checks are done that the subimage lies - // completely inside the original image. - WImageView(WImage* img, int c, int r, int width, int height); - - // Refer to external data. - // If not given width_step assumed to be same as width. - WImageView(T* data, int width, int height, int channels, int width_step = -1); - - // Refer to external data. This does NOT take ownership - // of the supplied IplImage. - WImageView(IplImage* img) : WImage(img) {} - - // Copy constructor - WImageView(const WImage& img) : WImage(0) { - header_ = *(img.Ipl()); - WImage::SetIpl(&header_); - } - - WImageView& operator=(const WImage& img) { - header_ = *(img.Ipl()); - WImage::SetIpl(&header_); - return *this; - } - -protected: - IplImage header_; -}; - - -template -class WImageViewC : public WImageC -{ -public: - typedef typename WImage::BaseType BaseType; - enum { kChannels = C }; - - // Default constructor needed for vectors of views. - WImageViewC(); - - virtual ~WImageViewC() {} - - // Construct a subimage. No checks are done that the subimage lies - // completely inside the original image. - WImageViewC(WImageC* img, - int c, int r, int width, int height); - - // Refer to external data - WImageViewC(T* data, int width, int height, int width_step = -1); - - // Refer to external data. This does NOT take ownership - // of the supplied IplImage. - WImageViewC(IplImage* img) : WImageC(img) {} - - // Copy constructor which does a shallow copy to allow multiple views - // of same data. gcc-4.1.1 gets confused if both versions of - // the constructor and assignment operator are not provided. - WImageViewC(const WImageC& img) : WImageC(0) { - header_ = *(img.Ipl()); - WImageC::SetIpl(&header_); - } - WImageViewC(const WImageViewC& img) : WImageC(0) { - header_ = *(img.Ipl()); - WImageC::SetIpl(&header_); - } - - WImageViewC& operator=(const WImageC& img) { - header_ = *(img.Ipl()); - WImageC::SetIpl(&header_); - return *this; - } - WImageViewC& operator=(const WImageViewC& img) { - header_ = *(img.Ipl()); - WImageC::SetIpl(&header_); - return *this; - } - -protected: - IplImage header_; -}; - - -// Specializations for depth -template<> -inline int WImage::Depth() const {return IPL_DEPTH_8U; } -template<> -inline int WImage::Depth() const {return IPL_DEPTH_8S; } -template<> -inline int WImage::Depth() const {return IPL_DEPTH_16S; } -template<> -inline int WImage::Depth() const {return IPL_DEPTH_16U; } -template<> -inline int WImage::Depth() const {return IPL_DEPTH_32S; } -template<> -inline int WImage::Depth() const {return IPL_DEPTH_32F; } -template<> -inline int WImage::Depth() const {return IPL_DEPTH_64F; } - -template inline WImage::~WImage() {} -template inline WImageC::~WImageC() {} - -template -inline void WImageBuffer::Allocate(int width, int height, int nchannels) -{ - if (IsNull() || WImage::Width() != width || - WImage::Height() != height || WImage::Channels() != nchannels) { - ReleaseImage(); - WImage::image_ = cvCreateImage(cvSize(width, height), - WImage::Depth(), nchannels); - } -} - -template -inline void WImageBufferC::Allocate(int width, int height) -{ - if (IsNull() || WImage::Width() != width || WImage::Height() != height) { - ReleaseImage(); - WImageC::SetIpl(cvCreateImage(cvSize(width, height),WImage::Depth(), C)); - } -} - -template -WImageView::WImageView(WImage* img, int c, int r, int width, int height) - : WImage(0) -{ - header_ = *(img->Ipl()); - header_.imageData = reinterpret_cast((*img)(c, r)); - header_.width = width; - header_.height = height; - WImage::SetIpl(&header_); -} - -template -WImageView::WImageView(T* data, int width, int height, int nchannels, int width_step) - : WImage(0) -{ - cvInitImageHeader(&header_, cvSize(width, height), WImage::Depth(), nchannels); - header_.imageData = reinterpret_cast(data); - if (width_step > 0) { - header_.widthStep = width_step; - } - WImage::SetIpl(&header_); -} - -template -WImageViewC::WImageViewC(WImageC* img, int c, int r, int width, int height) - : WImageC(0) -{ - header_ = *(img->Ipl()); - header_.imageData = reinterpret_cast((*img)(c, r)); - header_.width = width; - header_.height = height; - WImageC::SetIpl(&header_); -} - -template -WImageViewC::WImageViewC() : WImageC(0) { - cvInitImageHeader(&header_, cvSize(0, 0), WImage::Depth(), C); - header_.imageData = reinterpret_cast(0); - WImageC::SetIpl(&header_); -} - -template -WImageViewC::WImageViewC(T* data, int width, int height, int width_step) - : WImageC(0) -{ - cvInitImageHeader(&header_, cvSize(width, height), WImage::Depth(), C); - header_.imageData = reinterpret_cast(data); - if (width_step > 0) { - header_.widthStep = width_step; - } - WImageC::SetIpl(&header_); -} - -// Construct a view into a region of an image -template -WImageView WImage::View(int c, int r, int width, int height) { - return WImageView(this, c, r, width, height); -} - -template -WImageViewC WImageC::View(int c, int r, int width, int height) { - return WImageViewC(this, c, r, width, height); -} - -//! @} core - -} // end of namespace - -#endif // __cplusplus - -#endif diff --git a/IPL/include/opencv/opencv2/core_detect.hpp b/IPL/include/opencv/opencv2/core_detect.hpp new file mode 100644 index 0000000..46a5017 --- /dev/null +++ b/IPL/include/opencv/opencv2/core_detect.hpp @@ -0,0 +1,149 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +#ifndef _OPENCV_DNN_OBJDETECT_CORE_DETECT_HPP_ +#define _OPENCV_DNN_OBJDETECT_CORE_DETECT_HPP_ + +#include +#include + +#include +#include + +/** @defgroup dnn_objdetect DNN used for object detection +*/ + +namespace cv +{ +namespace dnn_objdetect +{ + + //! @addtogroup dnn_objdetect + //! @{ + + /** @brief Structure to hold the details pertaining to a single bounding box + */ + typedef struct + { + int xmin, xmax; + int ymin, ymax; + size_t class_idx; + std::string label_name; + double class_prob; + } object; + + + /** @brief A class to post process model predictions + */ + class CV_EXPORTS InferBbox + { + public: + /** @brief Default constructer + @param _delta_bbox Blob containing relative coordinates of bounding boxes + @param _class_scores Blob containing the probability values of each class + @param _conf_scores Blob containing the confidence scores + */ + InferBbox(Mat _delta_bbox, Mat _class_scores, Mat _conf_scores); + + /** @brief Filters the bounding boxes. + */ + void filter(double thresh = 0.8); + + /** @brief Vector which holds the final detections of the model + */ + std::vector detections; + + protected: + /** @brief Transform relative coordinates from ConvDet to bounding box coordinates + @param bboxes Vector to hold the predicted bounding boxes + */ + void transform_bboxes(std::vector > *bboxes); + + /** @brief Computes final probability values of each bounding box + @param final_probs Vector to hold the probability values + */ + void final_probability_dist(std::vector > *final_probs); + + /** @brief Transform bounding boxes from [x, y, h, w] to [xmin, ymin, xmax, ymax] + @param pre Vector conatining initial co-ordinates + @param post Vector containing the transformed co-ordinates + */ + void transform_bboxes_inv(std::vector > *pre, + std::vector > *post); + + /** @brief Ensures that the bounding box values are within image boundaries + @param min_max_boxes Vector containing bounding boxes of the form [xmin, ymin, xmax, ymax] + */ + void assert_predictions(std::vector > *min_max_boxes); + + /** @brief Filter top `n` predictions + @param probs Final probability values of bounding boxes + @param boxes Predicted bounding box co-ordinates + @param top_n_boxes Contains bounding box co-ordinates of top `n` boxes + @param top_n_idxs Containes class indices of top `n` bounding boxes + @param top_n_probs Contains probability values of top `n` bounding boxes + */ + void filter_top_n(std::vector > *probs, + std::vector > *boxes, + std::vector > &top_n_boxes, + std::vector &top_n_idxs, + std::vector &top_n_probs); + + /** @brief Wrapper to apply Non-Maximal Supression + @param top_n_boxes Contains bounding box co-ordinates of top `n` boxes + @param top_n_idxs Containes class indices of top `n` bounding boxes + @param top_n_probs Contains probability values of top `n` bounding boxes + */ + void nms_wrapper(std::vector > &top_n_boxes, + std::vector &top_n_idxs, + std::vector &top_n_probs); + + /** @brief Applies Non-Maximal Supression + @param boxes Bounding box co-ordinates belonging to one class + @param probs Probability values of boxes belonging to one class + */ + std::vector non_maximal_suppression(std::vector > + *boxes, std::vector *probs); + + /** @brief Computes intersection over union of bounding boxes + @param boxes Vector of bounding box co-ordinates + @param base_box Base box wrt which IOU is calculated + @param iou Vector to store IOU values + */ + void intersection_over_union(std::vector > *boxes, + std::vector *base_box, std::vector *iou); + + static inline bool comparator (std::pair l1, + std::pair l2) + { + return l1.first > l2.first; + } + + private: + Mat delta_bbox; + Mat class_scores; + Mat conf_scores; + + unsigned int image_width; + unsigned int image_height; + + unsigned int W, H; + std::vector > anchors_values; + std::vector > anchor_center; + std::vector > anchor_shapes; + + std::vector label_map; + + unsigned int num_classes; + unsigned int anchors_per_grid; + size_t anchors; + double intersection_thresh; + double nms_intersection_thresh; + size_t n_top_detections; + double epsilon; + }; + + //! @} +} // namespace dnn_objdetect +} // namespace cv +#endif diff --git a/IPL/include/opencv/opencv2/cvconfig.h b/IPL/include/opencv/opencv2/cvconfig.h index 1a6b85e..b6a8ca6 100644 --- a/IPL/include/opencv/opencv2/cvconfig.h +++ b/IPL/include/opencv/opencv2/cvconfig.h @@ -1,199 +1,168 @@ -/* OpenCV compiled as static or dynamic libs */ -#define BUILD_SHARED_LIBS - -/* Compile for 'real' NVIDIA GPU architectures */ -#define CUDA_ARCH_BIN "" - -/* Create PTX or BIN for 1.0 compute capability */ -/* #undef CUDA_ARCH_BIN_OR_PTX_10 */ - -/* NVIDIA GPU features are used */ -#define CUDA_ARCH_FEATURES "" - -/* Compile for 'virtual' NVIDIA PTX architectures */ -#define CUDA_ARCH_PTX "" - -/* AVFoundation video libraries */ -/* #undef HAVE_AVFOUNDATION */ - -/* V4L capturing support */ -/* #undef HAVE_CAMV4L */ - -/* V4L2 capturing support */ -/* #undef HAVE_CAMV4L2 */ - -/* Carbon windowing environment */ -/* #undef HAVE_CARBON */ - -/* AMD's Basic Linear Algebra Subprograms Library*/ -/* #undef HAVE_CLAMDBLAS */ - -/* AMD's OpenCL Fast Fourier Transform Library*/ -/* #undef HAVE_CLAMDFFT */ - -/* Clp support */ -/* #undef HAVE_CLP */ - -/* Cocoa API */ -/* #undef HAVE_COCOA */ - -/* C= */ -/* #undef HAVE_CSTRIPES */ - -/* NVidia Cuda Basic Linear Algebra Subprograms (BLAS) API*/ -/* #undef HAVE_CUBLAS */ - -/* NVidia Cuda Runtime API*/ -/* #undef HAVE_CUDA */ - -/* NVidia Cuda Fast Fourier Transform (FFT) API*/ -/* #undef HAVE_CUFFT */ - -/* IEEE1394 capturing support */ -/* #undef HAVE_DC1394 */ - -/* IEEE1394 capturing support - libdc1394 v2.x */ -/* #undef HAVE_DC1394_2 */ - -/* DirectX */ -#define HAVE_DIRECTX -#define HAVE_DIRECTX_NV12 -#define HAVE_D3D11 -#define HAVE_D3D10 -#define HAVE_D3D9 - -/* DirectShow Video Capture library */ -#define HAVE_DSHOW - -/* Eigen Matrix & Linear Algebra Library */ -/* #undef HAVE_EIGEN */ - -/* FFMpeg video library */ -#define HAVE_FFMPEG - -/* ffmpeg's libswscale */ -#define HAVE_FFMPEG_SWSCALE - -/* ffmpeg in Gentoo */ -#define HAVE_GENTOO_FFMPEG - -/* Geospatial Data Abstraction Library */ -/* #undef HAVE_GDAL */ - -/* GStreamer multimedia framework */ -/* #undef HAVE_GSTREAMER */ - -/* GTK+ 2.0 Thread support */ -/* #undef HAVE_GTHREAD */ - -/* GTK+ 2.x toolkit */ -/* #undef HAVE_GTK */ - -/* Define to 1 if you have the header file. */ -/* #undef HAVE_INTTYPES_H */ - -/* Intel Perceptual Computing SDK library */ -/* #undef HAVE_INTELPERC */ - -/* Intel Integrated Performance Primitives */ -#define HAVE_IPP -#define HAVE_IPP_ICV_ONLY - -/* Intel IPP Async */ -/* #undef HAVE_IPP_A */ - -/* JPEG-2000 codec */ -#define HAVE_JASPER - -/* IJG JPEG codec */ -#define HAVE_JPEG - -/* libpng/png.h needs to be included */ -/* #undef HAVE_LIBPNG_PNG_H */ - -/* V4L/V4L2 capturing support via libv4l */ -/* #undef HAVE_LIBV4L */ - -/* Microsoft Media Foundation Capture library */ -/* #undef HAVE_MSMF */ - -/* NVidia Video Decoding API*/ -/* #undef HAVE_NVCUVID */ - -/* NVidia Video Encoding API*/ -/* #undef HAVE_NVCUVENC */ - -/* OpenCL Support */ -#define HAVE_OPENCL -/* #undef HAVE_OPENCL_STATIC */ -/* #undef HAVE_OPENCL_SVM */ - -/* OpenEXR codec */ -#define HAVE_OPENEXR - -/* OpenGL support*/ -/* #undef HAVE_OPENGL */ - -/* OpenNI library */ -/* #undef HAVE_OPENNI */ - -/* OpenNI library */ -/* #undef HAVE_OPENNI2 */ - -/* PNG codec */ -#define HAVE_PNG - -/* Posix threads (pthreads) */ -/* #undef HAVE_PTHREADS */ - -/* parallel_for with pthreads */ -/* #undef HAVE_PTHREADS_PF */ - -/* Qt support */ -/* #undef HAVE_QT */ - -/* Qt OpenGL support */ -/* #undef HAVE_QT_OPENGL */ - -/* QuickTime video libraries */ -/* #undef HAVE_QUICKTIME */ - -/* QTKit video libraries */ -/* #undef HAVE_QTKIT */ - -/* Intel Threading Building Blocks */ -/* #undef HAVE_TBB */ - -/* TIFF codec */ -#define HAVE_TIFF - -/* Unicap video capture library */ -/* #undef HAVE_UNICAP */ - -/* Video for Windows support */ -#define HAVE_VFW - -/* V4L2 capturing support in videoio.h */ -/* #undef HAVE_VIDEOIO */ - -/* Win32 UI */ -#define HAVE_WIN32UI - -/* XIMEA camera support */ -/* #undef HAVE_XIMEA */ - -/* Xine video library */ -/* #undef HAVE_XINE */ - -/* Define if your processor stores words with the most significant byte - first (like Motorola and SPARC, unlike Intel and VAX). */ -/* #undef WORDS_BIGENDIAN */ - -/* gPhoto2 library */ -/* #undef HAVE_GPHOTO2 */ - -/* VA library (libva) */ -/* #undef HAVE_VA */ - -/* Intel VA-API/OpenCL */ -/* #undef HAVE_VA_INTEL */ +#ifndef OPENCV_CVCONFIG_H_INCLUDED +#define OPENCV_CVCONFIG_H_INCLUDED + +/* OpenCV compiled as static or dynamic libs */ +#define BUILD_SHARED_LIBS + +/* OpenCV intrinsics optimized code */ +#define CV_ENABLE_INTRINSICS + +/* OpenCV additional optimized code */ +/* #undef CV_DISABLE_OPTIMIZATION */ + +/* Compile for 'real' NVIDIA GPU architectures */ +#define CUDA_ARCH_BIN "" + +/* Create PTX or BIN for 1.0 compute capability */ +/* #undef CUDA_ARCH_BIN_OR_PTX_10 */ + +/* NVIDIA GPU features are used */ +#define CUDA_ARCH_FEATURES "" + +/* Compile for 'virtual' NVIDIA PTX architectures */ +#define CUDA_ARCH_PTX "" + +/* AMD's Basic Linear Algebra Subprograms Library*/ +/* #undef HAVE_CLAMDBLAS */ + +/* AMD's OpenCL Fast Fourier Transform Library*/ +/* #undef HAVE_CLAMDFFT */ + +/* Clp support */ +/* #undef HAVE_CLP */ + +/* Cocoa API */ +/* #undef HAVE_COCOA */ + +/* NVIDIA CUDA Runtime API*/ +/* #undef HAVE_CUDA */ + +/* NVIDIA CUDA Basic Linear Algebra Subprograms (BLAS) API*/ +/* #undef HAVE_CUBLAS */ + +/* NVIDIA CUDA Deep Neural Network (cuDNN) API*/ +/* #undef HAVE_CUDNN */ + +/* NVIDIA CUDA Fast Fourier Transform (FFT) API*/ +/* #undef HAVE_CUFFT */ + +/* DirectX */ +#define HAVE_DIRECTX +#define HAVE_DIRECTX_NV12 +#define HAVE_D3D11 +#define HAVE_D3D10 +#define HAVE_D3D9 + +/* Eigen Matrix & Linear Algebra Library */ +/* #undef HAVE_EIGEN */ + +/* Geospatial Data Abstraction Library */ +/* #undef HAVE_GDAL */ + +/* GTK+ 2.0 Thread support */ +/* #undef HAVE_GTHREAD */ + +/* GTK+ 2.x toolkit */ +/* #undef HAVE_GTK */ + +/* Halide support */ +/* #undef HAVE_HALIDE */ + +/* Vulkan support */ +/* #undef HAVE_VULKAN */ + +/* Define to 1 if you have the header file. */ +#define HAVE_INTTYPES_H 1 + +/* Intel Integrated Performance Primitives */ +#define HAVE_IPP +#define HAVE_IPP_ICV +#define HAVE_IPP_IW +#define HAVE_IPP_IW_LL + +/* JPEG-2000 codec */ +/* #undef HAVE_OPENJPEG */ +#define HAVE_JASPER + +/* IJG JPEG codec */ +#define HAVE_JPEG + +/* libpng/png.h needs to be included */ +/* #undef HAVE_LIBPNG_PNG_H */ + +/* GDCM DICOM codec */ +/* #undef HAVE_GDCM */ + +/* NVIDIA Video Decoding API*/ +/* #undef HAVE_NVCUVID */ + +/* NVIDIA Video Encoding API*/ +/* #undef HAVE_NVCUVENC */ + +/* OpenCL Support */ +#define HAVE_OPENCL +/* #undef HAVE_OPENCL_STATIC */ +/* #undef HAVE_OPENCL_SVM */ + +/* NVIDIA OpenCL D3D Extensions support */ +#define HAVE_OPENCL_D3D11_NV + +/* OpenEXR codec */ +#define HAVE_OPENEXR + +/* OpenGL support*/ +/* #undef HAVE_OPENGL */ + +/* PNG codec */ +#define HAVE_PNG + +/* Posix threads (pthreads) */ +/* #undef HAVE_PTHREAD */ + +/* parallel_for with pthreads */ +/* #undef HAVE_PTHREADS_PF */ + +/* Qt support */ +/* #undef HAVE_QT */ + +/* Qt OpenGL support */ +/* #undef HAVE_QT_OPENGL */ + +/* Intel Threading Building Blocks */ +/* #undef HAVE_TBB */ + +/* Ste||ar Group High Performance ParallelX */ +/* #undef HAVE_HPX */ + +/* TIFF codec */ +#define HAVE_TIFF + +/* Win32 UI */ +#define HAVE_WIN32UI + +/* Define if your processor stores words with the most significant byte + first (like Motorola and SPARC, unlike Intel and VAX). */ +/* #undef WORDS_BIGENDIAN */ + +/* VA library (libva) */ +/* #undef HAVE_VA */ + +/* Intel VA-API/OpenCL */ +/* #undef HAVE_VA_INTEL */ + +/* Lapack */ +/* #undef HAVE_LAPACK */ + +/* Library was compiled with functions instrumentation */ +/* #undef ENABLE_INSTRUMENTATION */ + +/* OpenVX */ +/* #undef HAVE_OPENVX */ + +/* OpenCV trace utilities */ +#define OPENCV_TRACE + +/* Library QR-code decoding */ +#define HAVE_QUIRC + +#endif // OPENCV_CVCONFIG_H_INCLUDED diff --git a/IPL/include/opencv/opencv2/datasets/ar_hmdb.hpp b/IPL/include/opencv/opencv2/datasets/ar_hmdb.hpp index 8941583..fa257ad 100644 --- a/IPL/include/opencv/opencv2/datasets/ar_hmdb.hpp +++ b/IPL/include/opencv/opencv2/datasets/ar_hmdb.hpp @@ -67,7 +67,7 @@ struct AR_hmdbObj : public Object class CV_EXPORTS AR_hmdb : public Dataset { public: - virtual void load(const std::string &path) = 0; + virtual void load(const std::string &path) CV_OVERRIDE = 0; static Ptr create(); }; diff --git a/IPL/include/opencv/opencv2/datasets/ar_sports.hpp b/IPL/include/opencv/opencv2/datasets/ar_sports.hpp index 7f51405..cb24e38 100644 --- a/IPL/include/opencv/opencv2/datasets/ar_sports.hpp +++ b/IPL/include/opencv/opencv2/datasets/ar_sports.hpp @@ -66,7 +66,7 @@ struct AR_sportsObj : public Object class CV_EXPORTS AR_sports : public Dataset { public: - virtual void load(const std::string &path) = 0; + virtual void load(const std::string &path) CV_OVERRIDE = 0; static Ptr create(); }; diff --git a/IPL/include/opencv/opencv2/datasets/dataset.hpp b/IPL/include/opencv/opencv2/datasets/dataset.hpp index ccf2b66..7b05c10 100644 --- a/IPL/include/opencv/opencv2/datasets/dataset.hpp +++ b/IPL/include/opencv/opencv2/datasets/dataset.hpp @@ -435,6 +435,53 @@ Implements loading dataset: ./opencv/build/bin/example_datasets_slam_tumindoor -p=/home/user/path_to_unpacked_folders/ ~~~ +@defgroup datasets_sr Super Resolution + +### The Berkeley Segmentation Dataset and Benchmark + +Implements loading dataset: + +"The Berkeley Segmentation Dataset and Benchmark": + +Usage: +-# From link above download `BSDS300-images.tgz`. +-# Unpack. +-# To load data run: +~~~ +./opencv/build/bin/example_datasets_sr_bsds -p=/home/user/path_to_unpacked_folder/ +~~~ + +### DIV2K dataset: DIVerse 2K + +Implements loading dataset: + +"DIV2K dataset: DIVerse 2K": + +Usage: +-# From link above download 'Train data (HR images)' or any other of the dataset files. +-# Unpack. +-# To load data run: +~~~ +./opencv/build/bin/example_datasets_sr_div2k -p=/home/user/path_to_unpacked_folder/folder_containing_the_images/ +~~~ + +### The General-100 Dataset + +Implements loading dataset: + +"General-100 dataset contains 100 bmp-format images (with no compression). +We used this dataset in our FSRCNN ECCV 2016 paper. The size of these 100 images ranges from 710 x 704 (large) to 131 x 112 (small). +They are all of good quality with clear edges but fewer smooth regions (e.g., sky and ocean), thus are very suitable for the super-resolution training.": + + +Usage: +-# From link above download `General-100.zip`. +-# Unpack. +-# To load data run: +~~~ +./opencv/build/bin/example_datasets_sr_general100 -p=/home/user/path_to_unpacked_folder/ +~~~ + @defgroup datasets_tr Text Recognition ### The Chars74K Dataset @@ -485,7 +532,7 @@ Implements loading dataset: "VOT 2015 dataset comprises 60 short sequences showing various objects in challenging backgrounds. The sequences were chosen from a large pool of sequences including the ALOV dataset, OTB2 dataset, -non-tracking datasets, Computer Vision Online, Professor Bob Fisher’s Image Database, Videezy, +non-tracking datasets, Computer Vision Online, Professor Bob Fisher's Image Database, Videezy, Center for Research in Computer Vision, University of Central Florida, USA, NYU Center for Genomics and Systems Biology, Data Wrangling, Open Access Directory and Learning and Recognition in Vision Group, INRIA, France. The VOT sequence selection protocol was applied to obtain a representative diff --git a/IPL/include/opencv/opencv2/datasets/fr_adience.hpp b/IPL/include/opencv/opencv2/datasets/fr_adience.hpp index c84bce1..2df5d8d 100644 --- a/IPL/include/opencv/opencv2/datasets/fr_adience.hpp +++ b/IPL/include/opencv/opencv2/datasets/fr_adience.hpp @@ -83,7 +83,7 @@ struct FR_adienceObj : public Object class CV_EXPORTS FR_adience : public Dataset { public: - virtual void load(const std::string &path) = 0; + virtual void load(const std::string &path) CV_OVERRIDE = 0; static Ptr create(); diff --git a/IPL/include/opencv/opencv2/datasets/fr_lfw.hpp b/IPL/include/opencv/opencv2/datasets/fr_lfw.hpp index 7065da7..16d6315 100644 --- a/IPL/include/opencv/opencv2/datasets/fr_lfw.hpp +++ b/IPL/include/opencv/opencv2/datasets/fr_lfw.hpp @@ -66,7 +66,7 @@ struct FR_lfwObj : public Object class CV_EXPORTS FR_lfw : public Dataset { public: - virtual void load(const std::string &path) = 0; + virtual void load(const std::string &path) CV_OVERRIDE = 0; static Ptr create(); }; diff --git a/IPL/include/opencv/opencv2/datasets/gr_chalearn.hpp b/IPL/include/opencv/opencv2/datasets/gr_chalearn.hpp index a8eaa6c..6a5c764 100644 --- a/IPL/include/opencv/opencv2/datasets/gr_chalearn.hpp +++ b/IPL/include/opencv/opencv2/datasets/gr_chalearn.hpp @@ -83,7 +83,7 @@ struct GR_chalearnObj : public Object class CV_EXPORTS GR_chalearn : public Dataset { public: - virtual void load(const std::string &path) = 0; + virtual void load(const std::string &path) CV_OVERRIDE = 0; static Ptr create(); }; diff --git a/IPL/include/opencv/opencv2/datasets/gr_skig.hpp b/IPL/include/opencv/opencv2/datasets/gr_skig.hpp index 9c86224..35bc6f4 100644 --- a/IPL/include/opencv/opencv2/datasets/gr_skig.hpp +++ b/IPL/include/opencv/opencv2/datasets/gr_skig.hpp @@ -105,7 +105,7 @@ struct GR_skigObj : public Object class CV_EXPORTS GR_skig : public Dataset { public: - virtual void load(const std::string &path) = 0; + virtual void load(const std::string &path) CV_OVERRIDE = 0; static Ptr create(); }; diff --git a/IPL/include/opencv/opencv2/datasets/hpe_humaneva.hpp b/IPL/include/opencv/opencv2/datasets/hpe_humaneva.hpp index 5366e0d..2eba8cf 100644 --- a/IPL/include/opencv/opencv2/datasets/hpe_humaneva.hpp +++ b/IPL/include/opencv/opencv2/datasets/hpe_humaneva.hpp @@ -77,7 +77,7 @@ enum datasetType class CV_EXPORTS HPE_humaneva : public Dataset { public: - virtual void load(const std::string &path) = 0; + virtual void load(const std::string &path) CV_OVERRIDE = 0; static Ptr create(int num=humaneva_1); }; diff --git a/IPL/include/opencv/opencv2/datasets/hpe_parse.hpp b/IPL/include/opencv/opencv2/datasets/hpe_parse.hpp index 7629e2c..42294df 100644 --- a/IPL/include/opencv/opencv2/datasets/hpe_parse.hpp +++ b/IPL/include/opencv/opencv2/datasets/hpe_parse.hpp @@ -65,7 +65,7 @@ struct HPE_parseObj : public Object class CV_EXPORTS HPE_parse : public Dataset { public: - virtual void load(const std::string &path) = 0; + virtual void load(const std::string &path) CV_OVERRIDE = 0; static Ptr create(); }; diff --git a/IPL/include/opencv/opencv2/datasets/ir_affine.hpp b/IPL/include/opencv/opencv2/datasets/ir_affine.hpp index 3b04a4b..7cc0c09 100644 --- a/IPL/include/opencv/opencv2/datasets/ir_affine.hpp +++ b/IPL/include/opencv/opencv2/datasets/ir_affine.hpp @@ -67,7 +67,7 @@ struct IR_affineObj : public Object class CV_EXPORTS IR_affine : public Dataset { public: - virtual void load(const std::string &path) = 0; + virtual void load(const std::string &path) CV_OVERRIDE = 0; static Ptr create(); }; diff --git a/IPL/include/opencv/opencv2/datasets/ir_robot.hpp b/IPL/include/opencv/opencv2/datasets/ir_robot.hpp index 0acfe0a..446e3be 100644 --- a/IPL/include/opencv/opencv2/datasets/ir_robot.hpp +++ b/IPL/include/opencv/opencv2/datasets/ir_robot.hpp @@ -76,7 +76,7 @@ struct IR_robotObj : public Object class CV_EXPORTS IR_robot : public Dataset { public: - virtual void load(const std::string &path) = 0; + virtual void load(const std::string &path) CV_OVERRIDE = 0; static Ptr create(); }; diff --git a/IPL/include/opencv/opencv2/datasets/is_bsds.hpp b/IPL/include/opencv/opencv2/datasets/is_bsds.hpp index 7357a67..b4a3e65 100644 --- a/IPL/include/opencv/opencv2/datasets/is_bsds.hpp +++ b/IPL/include/opencv/opencv2/datasets/is_bsds.hpp @@ -65,7 +65,7 @@ struct IS_bsdsObj : public Object class CV_EXPORTS IS_bsds : public Dataset { public: - virtual void load(const std::string &path) = 0; + virtual void load(const std::string &path) CV_OVERRIDE = 0; static Ptr create(); }; diff --git a/IPL/include/opencv/opencv2/datasets/is_weizmann.hpp b/IPL/include/opencv/opencv2/datasets/is_weizmann.hpp index 5daa420..4f61578 100644 --- a/IPL/include/opencv/opencv2/datasets/is_weizmann.hpp +++ b/IPL/include/opencv/opencv2/datasets/is_weizmann.hpp @@ -68,7 +68,7 @@ struct IS_weizmannObj : public Object class CV_EXPORTS IS_weizmann : public Dataset { public: - virtual void load(const std::string &path) = 0; + virtual void load(const std::string &path) CV_OVERRIDE = 0; static Ptr create(); }; diff --git a/IPL/include/opencv/opencv2/datasets/msm_epfl.hpp b/IPL/include/opencv/opencv2/datasets/msm_epfl.hpp index a08fc4b..54aebde 100644 --- a/IPL/include/opencv/opencv2/datasets/msm_epfl.hpp +++ b/IPL/include/opencv/opencv2/datasets/msm_epfl.hpp @@ -77,7 +77,7 @@ struct MSM_epflObj : public Object class CV_EXPORTS MSM_epfl : public Dataset { public: - virtual void load(const std::string &path) = 0; + virtual void load(const std::string &path) CV_OVERRIDE = 0; static Ptr create(); }; diff --git a/IPL/include/opencv/opencv2/datasets/msm_middlebury.hpp b/IPL/include/opencv/opencv2/datasets/msm_middlebury.hpp index 2fd67bf..9b883e5 100644 --- a/IPL/include/opencv/opencv2/datasets/msm_middlebury.hpp +++ b/IPL/include/opencv/opencv2/datasets/msm_middlebury.hpp @@ -68,7 +68,7 @@ struct MSM_middleburyObj : public Object class CV_EXPORTS MSM_middlebury : public Dataset { public: - virtual void load(const std::string &path) = 0; + virtual void load(const std::string &path) CV_OVERRIDE = 0; static Ptr create(); }; diff --git a/IPL/include/opencv/opencv2/datasets/or_imagenet.hpp b/IPL/include/opencv/opencv2/datasets/or_imagenet.hpp index 26a8f63..079165b 100644 --- a/IPL/include/opencv/opencv2/datasets/or_imagenet.hpp +++ b/IPL/include/opencv/opencv2/datasets/or_imagenet.hpp @@ -66,7 +66,7 @@ struct OR_imagenetObj : public Object class CV_EXPORTS OR_imagenet : public Dataset { public: - virtual void load(const std::string &path) = 0; + virtual void load(const std::string &path) CV_OVERRIDE = 0; static Ptr create(); }; diff --git a/IPL/include/opencv/opencv2/datasets/or_mnist.hpp b/IPL/include/opencv/opencv2/datasets/or_mnist.hpp index ff6bd60..ac4d838 100644 --- a/IPL/include/opencv/opencv2/datasets/or_mnist.hpp +++ b/IPL/include/opencv/opencv2/datasets/or_mnist.hpp @@ -66,7 +66,7 @@ struct OR_mnistObj : public Object class CV_EXPORTS OR_mnist : public Dataset { public: - virtual void load(const std::string &path) = 0; + virtual void load(const std::string &path) CV_OVERRIDE = 0; static Ptr create(); }; diff --git a/IPL/include/opencv/opencv2/datasets/or_pascal.hpp b/IPL/include/opencv/opencv2/datasets/or_pascal.hpp index bca8e62..c5f27af 100644 --- a/IPL/include/opencv/opencv2/datasets/or_pascal.hpp +++ b/IPL/include/opencv/opencv2/datasets/or_pascal.hpp @@ -89,7 +89,7 @@ struct OR_pascalObj : public Object class CV_EXPORTS OR_pascal : public Dataset { public: - virtual void load(const std::string &path) = 0; + virtual void load(const std::string &path) CV_OVERRIDE = 0; static Ptr create(); }; diff --git a/IPL/include/opencv/opencv2/datasets/or_sun.hpp b/IPL/include/opencv/opencv2/datasets/or_sun.hpp index 059c0d4..7d3d887 100644 --- a/IPL/include/opencv/opencv2/datasets/or_sun.hpp +++ b/IPL/include/opencv/opencv2/datasets/or_sun.hpp @@ -66,7 +66,7 @@ struct OR_sunObj : public Object class CV_EXPORTS OR_sun : public Dataset { public: - virtual void load(const std::string &path) = 0; + virtual void load(const std::string &path) CV_OVERRIDE = 0; static Ptr create(); diff --git a/IPL/include/opencv/opencv2/datasets/pd_caltech.hpp b/IPL/include/opencv/opencv2/datasets/pd_caltech.hpp index 9ff7278..c8545e2 100644 --- a/IPL/include/opencv/opencv2/datasets/pd_caltech.hpp +++ b/IPL/include/opencv/opencv2/datasets/pd_caltech.hpp @@ -76,7 +76,7 @@ struct PD_caltechObj : public Object class CV_EXPORTS PD_caltech : public Dataset { public: - virtual void load(const std::string &path) = 0; + virtual void load(const std::string &path) CV_OVERRIDE = 0; static Ptr create(); }; diff --git a/IPL/include/opencv/opencv2/datasets/pd_inria.hpp b/IPL/include/opencv/opencv2/datasets/pd_inria.hpp index 7586578..9e1d30b 100644 --- a/IPL/include/opencv/opencv2/datasets/pd_inria.hpp +++ b/IPL/include/opencv/opencv2/datasets/pd_inria.hpp @@ -57,7 +57,7 @@ namespace datasets //! @addtogroup datasets_pd //! @{ -enum sampleType +enum sampleType { POS = 0, NEG = 1 @@ -67,7 +67,7 @@ struct PD_inriaObj : public Object { // image file name std::string filename; - + // positive or negative sampleType sType; @@ -83,7 +83,7 @@ struct PD_inriaObj : public Object class CV_EXPORTS PD_inria : public Dataset { public: - virtual void load(const std::string &path) = 0; + virtual void load(const std::string &path) CV_OVERRIDE = 0; static Ptr create(); }; diff --git a/IPL/include/opencv/opencv2/datasets/slam_kitti.hpp b/IPL/include/opencv/opencv2/datasets/slam_kitti.hpp index 1b7c408..a004b95 100644 --- a/IPL/include/opencv/opencv2/datasets/slam_kitti.hpp +++ b/IPL/include/opencv/opencv2/datasets/slam_kitti.hpp @@ -74,7 +74,7 @@ struct SLAM_kittiObj : public Object class CV_EXPORTS SLAM_kitti : public Dataset { public: - virtual void load(const std::string &path) = 0; + virtual void load(const std::string &path) CV_OVERRIDE = 0; static Ptr create(); }; diff --git a/IPL/include/opencv/opencv2/datasets/slam_tumindoor.hpp b/IPL/include/opencv/opencv2/datasets/slam_tumindoor.hpp index 758dd13..eca07ad 100644 --- a/IPL/include/opencv/opencv2/datasets/slam_tumindoor.hpp +++ b/IPL/include/opencv/opencv2/datasets/slam_tumindoor.hpp @@ -74,7 +74,7 @@ struct SLAM_tumindoorObj : public Object class CV_EXPORTS SLAM_tumindoor : public Dataset { public: - virtual void load(const std::string &path) = 0; + virtual void load(const std::string &path) CV_OVERRIDE = 0; static Ptr create(); }; diff --git a/IPL/include/opencv/opencv2/datasets/sr_bsds.hpp b/IPL/include/opencv/opencv2/datasets/sr_bsds.hpp new file mode 100644 index 0000000..c319323 --- /dev/null +++ b/IPL/include/opencv/opencv2/datasets/sr_bsds.hpp @@ -0,0 +1,41 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#ifndef OPENCV_DATASETS_SR_BSDS_HPP +#define OPENCV_DATASETS_SR_BSDS_HPP + +#include +#include + +#include "opencv2/datasets/dataset.hpp" + +#include + +namespace cv +{ +namespace datasets +{ + +//! @addtogroup datasets_sr +//! @{ + +struct SR_bsdsObj : public Object +{ + std::string imageName; +}; + +class CV_EXPORTS SR_bsds : public Dataset +{ +public: + virtual void load(const std::string &path) CV_OVERRIDE = 0; + + static Ptr create(); +}; + +//! @} + +} +} + +#endif \ No newline at end of file diff --git a/IPL/include/opencv/opencv2/datasets/sr_div2k.hpp b/IPL/include/opencv/opencv2/datasets/sr_div2k.hpp new file mode 100644 index 0000000..9e2c025 --- /dev/null +++ b/IPL/include/opencv/opencv2/datasets/sr_div2k.hpp @@ -0,0 +1,41 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#ifndef OPENCV_DATASETS_SR_DIV2K_HPP +#define OPENCV_DATASETS_SR_DIV2K_HPP + +#include +#include + +#include "opencv2/datasets/dataset.hpp" + +#include + +namespace cv +{ +namespace datasets +{ + +//! @addtogroup datasets_sr +//! @{ + +struct SR_div2kObj : public Object +{ + std::string imageName; +}; + +class CV_EXPORTS SR_div2k : public Dataset +{ +public: + virtual void load(const std::string &path) CV_OVERRIDE = 0; + + static Ptr create(); +}; + +//! @} + +} +} + +#endif \ No newline at end of file diff --git a/IPL/include/opencv/opencv2/datasets/sr_general100.hpp b/IPL/include/opencv/opencv2/datasets/sr_general100.hpp new file mode 100644 index 0000000..8b2d189 --- /dev/null +++ b/IPL/include/opencv/opencv2/datasets/sr_general100.hpp @@ -0,0 +1,41 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#ifndef OPENCV_DATASETS_SR_GENERAL100_HPP +#define OPENCV_DATASETS_SR_GENERAL100_HPP + +#include +#include + +#include "opencv2/datasets/dataset.hpp" + +#include + +namespace cv +{ +namespace datasets +{ + +//! @addtogroup datasets_sr +//! @{ + +struct SR_general100Obj : public Object +{ + std::string imageName; +}; + +class CV_EXPORTS SR_general100 : public Dataset +{ +public: + virtual void load(const std::string &path) CV_OVERRIDE = 0; + + static Ptr create(); +}; + +//! @} + +} +} + +#endif \ No newline at end of file diff --git a/IPL/include/opencv/opencv2/datasets/tr_chars.hpp b/IPL/include/opencv/opencv2/datasets/tr_chars.hpp index c213bff..69aab83 100644 --- a/IPL/include/opencv/opencv2/datasets/tr_chars.hpp +++ b/IPL/include/opencv/opencv2/datasets/tr_chars.hpp @@ -66,7 +66,7 @@ struct TR_charsObj : public Object class CV_EXPORTS TR_chars : public Dataset { public: - virtual void load(const std::string &path) = 0; + virtual void load(const std::string &path) CV_OVERRIDE = 0; static Ptr create(); }; diff --git a/IPL/include/opencv/opencv2/datasets/tr_icdar.hpp b/IPL/include/opencv/opencv2/datasets/tr_icdar.hpp index abfd7db..3b43d69 100644 --- a/IPL/include/opencv/opencv2/datasets/tr_icdar.hpp +++ b/IPL/include/opencv/opencv2/datasets/tr_icdar.hpp @@ -74,7 +74,7 @@ struct TR_icdarObj : public Object class CV_EXPORTS TR_icdar : public Dataset { public: - virtual void load(const std::string &path) = 0; + virtual void load(const std::string &path) CV_OVERRIDE = 0; static Ptr create(); }; diff --git a/IPL/include/opencv/opencv2/datasets/tr_svt.hpp b/IPL/include/opencv/opencv2/datasets/tr_svt.hpp index 6c2d533..7b94180 100644 --- a/IPL/include/opencv/opencv2/datasets/tr_svt.hpp +++ b/IPL/include/opencv/opencv2/datasets/tr_svt.hpp @@ -73,7 +73,7 @@ struct TR_svtObj : public Object class CV_EXPORTS TR_svt : public Dataset { public: - virtual void load(const std::string &path) = 0; + virtual void load(const std::string &path) CV_OVERRIDE = 0; static Ptr create(); }; diff --git a/IPL/include/opencv/opencv/cv.h b/IPL/include/opencv/opencv2/datasets/track_alov.hpp similarity index 53% rename from IPL/include/opencv/opencv/cv.h rename to IPL/include/opencv/opencv2/datasets/track_alov.hpp index 0aefc6d..a3c5da0 100644 --- a/IPL/include/opencv/opencv/cv.h +++ b/IPL/include/opencv/opencv2/datasets/track_alov.hpp @@ -10,8 +10,7 @@ // License Agreement // For Open Source Computer Vision Library // -// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. -// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Copyright (C) 2014, Itseez Inc, all rights reserved. // Third party copyrights are property of their respective owners. // // Redistribution and use in source and binary forms, with or without modification, @@ -30,7 +29,7 @@ // This software is provided by the copyright holders and contributors "as is" and // any express or implied warranties, including, but not limited to, the implied // warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, +// In no event shall the Itseez Inc or contributors be liable for any direct, // indirect, incidental, special, exemplary, or consequential damages // (including, but not limited to, procurement of substitute goods or services; // loss of use, data, or profits; or business interruption) however caused @@ -40,34 +39,69 @@ // //M*/ -#ifndef __OPENCV_OLD_CV_H__ -#define __OPENCV_OLD_CV_H__ - -#if defined(_MSC_VER) - #define CV_DO_PRAGMA(x) __pragma(x) - #define __CVSTR2__(x) #x - #define __CVSTR1__(x) __CVSTR2__(x) - #define __CVMSVCLOC__ __FILE__ "("__CVSTR1__(__LINE__)") : " - #define CV_MSG_PRAGMA(_msg) CV_DO_PRAGMA(message (__CVMSVCLOC__ _msg)) -#elif defined(__GNUC__) - #define CV_DO_PRAGMA(x) _Pragma (#x) - #define CV_MSG_PRAGMA(_msg) CV_DO_PRAGMA(message (_msg)) -#else - #define CV_DO_PRAGMA(x) - #define CV_MSG_PRAGMA(_msg) -#endif -#define CV_WARNING(x) CV_MSG_PRAGMA("Warning: " #x) +#ifndef OPENCV_DATASETS_TRACK_ALOV_HPP +#define OPENCV_DATASETS_TRACK_ALOV_HPP + +#include +#include + +#include "opencv2/datasets/dataset.hpp" +#include "opencv2/datasets/util.hpp" + +using namespace std; + +namespace cv +{ +namespace datasets +{ + +//! @addtogroup datasets_track +//! @{ + +struct TRACK_alovObj : public Object +{ + int id; + std::string imagePath; + vector gtbb; +}; + +const string sectionNames[] = { "01-Light", "02-SurfaceCover", "03-Specularity", "04-Transparency", "05-Shape", "06-MotionSmoothness", "07-MotionCoherence", +"08-Clutter", "09-Confusion", "10-LowContrast", "11-Occlusion", "12-MovingCamera", "13-ZoomingCamera", "14-LongDuration" }; + +const int sectionSizes[] = { 33, 15, 18, 20, 24, 22, 12, 15, 37, 23, 34, 22, 29, 10 }; -//CV_WARNING("This is a deprecated opencv header provided for compatibility. Please include a header from a corresponding opencv module") +class CV_EXPORTS TRACK_alov : public Dataset +{ +public: + static Ptr create(); -#include "opencv2/core/core_c.h" -#include "opencv2/imgproc/imgproc_c.h" -#include "opencv2/photo/photo_c.h" -#include "opencv2/video/tracking_c.h" -#include "opencv2/objdetect/objdetect_c.h" + virtual void load(const std::string &path) CV_OVERRIDE = 0; -#if !defined(CV_IMPL) -#define CV_IMPL extern "C" -#endif //CV_IMPL + //Load only frames with annotations (~every 5-th frame) + virtual void loadAnnotatedOnly(const std::string &path) = 0; -#endif // __OPENCV_OLD_CV_H_ + virtual int getDatasetsNum() = 0; + + virtual int getDatasetLength(int id) = 0; + + virtual bool initDataset(int id) = 0; + + virtual bool getNextFrame(Mat &frame) = 0; + virtual vector getNextGT() = 0; + + //Get frame/GT by datasetID (1..N) frameID (1..K) + virtual bool getFrame(Mat &frame, int datasetID, int frameID) = 0; + virtual vector getGT(int datasetID, int frameID) = 0; + +protected: + vector > > data; + int activeDatasetID; + int frameCounter; +}; + +//! @} + +} +} + +#endif diff --git a/IPL/include/opencv/opencv2/datasets/track_vot.hpp b/IPL/include/opencv/opencv2/datasets/track_vot.hpp index 6249f02..a5a9c6f 100644 --- a/IPL/include/opencv/opencv2/datasets/track_vot.hpp +++ b/IPL/include/opencv/opencv2/datasets/track_vot.hpp @@ -70,7 +70,7 @@ class CV_EXPORTS TRACK_vot : public Dataset public: static Ptr create(); - virtual void load(const std::string &path) = 0; + virtual void load(const std::string &path) CV_OVERRIDE = 0; virtual int getDatasetsNum() = 0; diff --git a/IPL/include/opencv/opencv2/dnn.hpp b/IPL/include/opencv/opencv2/dnn.hpp index 37be989..97f2fe3 100644 --- a/IPL/include/opencv/opencv2/dnn.hpp +++ b/IPL/include/opencv/opencv2/dnn.hpp @@ -39,12 +39,12 @@ // //M*/ -#ifndef __OPENCV_DNN_HPP__ -#define __OPENCV_DNN_HPP__ +#ifndef OPENCV_DNN_HPP +#define OPENCV_DNN_HPP -// This is an umbrealla header to include into you project. +// This is an umbrella header to include into you project. // We are free to change headers layout in dnn subfolder, so please include -// this header for future compartibility +// this header for future compatibility /** @defgroup dnn Deep Neural Network module @@ -52,13 +52,27 @@ This module contains: - API for new layers creation, layers are building bricks of neural networks; - set of built-in most-useful Layers; - - API to constuct and modify comprehensive neural networks from layers; - - functionality for loading serialized networks models from differnet frameworks. + - API to construct and modify comprehensive neural networks from layers; + - functionality for loading serialized networks models from different frameworks. - Functionality of this module is designed only for forward pass computations (i. e. network testing). + Functionality of this module is designed only for forward pass computations (i.e. network testing). A network training is in principle not supported. @} */ +/** @example samples/dnn/classification.cpp +Check @ref tutorial_dnn_googlenet "the corresponding tutorial" for more details +*/ +/** @example samples/dnn/colorization.cpp +*/ +/** @example samples/dnn/object_detection.cpp +Check @ref tutorial_dnn_yolo "the corresponding tutorial" for more details +*/ +/** @example samples/dnn/openpose.cpp +*/ +/** @example samples/dnn/segmentation.cpp +*/ +/** @example samples/dnn/text_detection.cpp +*/ #include -#endif /* __OPENCV_DNN_HPP__ */ +#endif /* OPENCV_DNN_HPP */ diff --git a/IPL/include/opencv/opencv2/dnn/all_layers.hpp b/IPL/include/opencv/opencv2/dnn/all_layers.hpp new file mode 100644 index 0000000..0969ba6 --- /dev/null +++ b/IPL/include/opencv/opencv2/dnn/all_layers.hpp @@ -0,0 +1,656 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2013, OpenCV Foundation, all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef OPENCV_DNN_DNN_ALL_LAYERS_HPP +#define OPENCV_DNN_DNN_ALL_LAYERS_HPP +#include + +namespace cv { +namespace dnn { +CV__DNN_INLINE_NS_BEGIN +//! @addtogroup dnn +//! @{ + +/** @defgroup dnnLayerList Partial List of Implemented Layers + @{ + This subsection of dnn module contains information about built-in layers and their descriptions. + + Classes listed here, in fact, provides C++ API for creating instances of built-in layers. + In addition to this way of layers instantiation, there is a more common factory API (see @ref dnnLayerFactory), it allows to create layers dynamically (by name) and register new ones. + You can use both API, but factory API is less convenient for native C++ programming and basically designed for use inside importers (see @ref readNetFromCaffe(), @ref readNetFromTorch(), @ref readNetFromTensorflow()). + + Built-in layers partially reproduce functionality of corresponding Caffe and Torch7 layers. + In particular, the following layers and Caffe importer were tested to reproduce Caffe functionality: + - Convolution + - Deconvolution + - Pooling + - InnerProduct + - TanH, ReLU, Sigmoid, BNLL, Power, AbsVal + - Softmax + - Reshape, Flatten, Slice, Split + - LRN + - MVN + - Dropout (since it does nothing on forward pass -)) +*/ + + class CV_EXPORTS BlankLayer : public Layer + { + public: + static Ptr create(const LayerParams ¶ms); + }; + + /** + * Constant layer produces the same data blob at an every forward pass. + */ + class CV_EXPORTS ConstLayer : public Layer + { + public: + static Ptr create(const LayerParams ¶ms); + }; + + //! LSTM recurrent layer + class CV_EXPORTS LSTMLayer : public Layer + { + public: + /** Creates instance of LSTM layer */ + static Ptr create(const LayerParams& params); + + /** @deprecated Use LayerParams::blobs instead. + @brief Set trained weights for LSTM layer. + + LSTM behavior on each step is defined by current input, previous output, previous cell state and learned weights. + + Let @f$x_t@f$ be current input, @f$h_t@f$ be current output, @f$c_t@f$ be current state. + Than current output and current cell state is computed as follows: + @f{eqnarray*}{ + h_t &= o_t \odot tanh(c_t), \\ + c_t &= f_t \odot c_{t-1} + i_t \odot g_t, \\ + @f} + where @f$\odot@f$ is per-element multiply operation and @f$i_t, f_t, o_t, g_t@f$ is internal gates that are computed using learned weights. + + Gates are computed as follows: + @f{eqnarray*}{ + i_t &= sigmoid&(W_{xi} x_t + W_{hi} h_{t-1} + b_i), \\ + f_t &= sigmoid&(W_{xf} x_t + W_{hf} h_{t-1} + b_f), \\ + o_t &= sigmoid&(W_{xo} x_t + W_{ho} h_{t-1} + b_o), \\ + g_t &= tanh &(W_{xg} x_t + W_{hg} h_{t-1} + b_g), \\ + @f} + where @f$W_{x?}@f$, @f$W_{h?}@f$ and @f$b_{?}@f$ are learned weights represented as matrices: + @f$W_{x?} \in R^{N_h \times N_x}@f$, @f$W_{h?} \in R^{N_h \times N_h}@f$, @f$b_? \in R^{N_h}@f$. + + For simplicity and performance purposes we use @f$ W_x = [W_{xi}; W_{xf}; W_{xo}, W_{xg}] @f$ + (i.e. @f$W_x@f$ is vertical concatenation of @f$ W_{x?} @f$), @f$ W_x \in R^{4N_h \times N_x} @f$. + The same for @f$ W_h = [W_{hi}; W_{hf}; W_{ho}, W_{hg}], W_h \in R^{4N_h \times N_h} @f$ + and for @f$ b = [b_i; b_f, b_o, b_g]@f$, @f$b \in R^{4N_h} @f$. + + @param Wh is matrix defining how previous output is transformed to internal gates (i.e. according to above mentioned notation is @f$ W_h @f$) + @param Wx is matrix defining how current input is transformed to internal gates (i.e. according to above mentioned notation is @f$ W_x @f$) + @param b is bias vector (i.e. according to above mentioned notation is @f$ b @f$) + */ + CV_DEPRECATED virtual void setWeights(const Mat &Wh, const Mat &Wx, const Mat &b) = 0; + + /** @brief Specifies shape of output blob which will be [[`T`], `N`] + @p outTailShape. + * @details If this parameter is empty or unset then @p outTailShape = [`Wh`.size(0)] will be used, + * where `Wh` is parameter from setWeights(). + */ + virtual void setOutShape(const MatShape &outTailShape = MatShape()) = 0; + + /** @deprecated Use flag `produce_cell_output` in LayerParams. + * @brief Specifies either interpret first dimension of input blob as timestamp dimension either as sample. + * + * If flag is set to true then shape of input blob will be interpreted as [`T`, `N`, `[data dims]`] where `T` specifies number of timestamps, `N` is number of independent streams. + * In this case each forward() call will iterate through `T` timestamps and update layer's state `T` times. + * + * If flag is set to false then shape of input blob will be interpreted as [`N`, `[data dims]`]. + * In this case each forward() call will make one iteration and produce one timestamp with shape [`N`, `[out dims]`]. + */ + CV_DEPRECATED virtual void setUseTimstampsDim(bool use = true) = 0; + + /** @deprecated Use flag `use_timestamp_dim` in LayerParams. + * @brief If this flag is set to true then layer will produce @f$ c_t @f$ as second output. + * @details Shape of the second output is the same as first output. + */ + CV_DEPRECATED virtual void setProduceCellOutput(bool produce = false) = 0; + + /* In common case it use single input with @f$x_t@f$ values to compute output(s) @f$h_t@f$ (and @f$c_t@f$). + * @param input should contain packed values @f$x_t@f$ + * @param output contains computed outputs: @f$h_t@f$ (and @f$c_t@f$ if setProduceCellOutput() flag was set to true). + * + * If setUseTimstampsDim() is set to true then @p input[0] should has at least two dimensions with the following shape: [`T`, `N`, `[data dims]`], + * where `T` specifies number of timestamps, `N` is number of independent streams (i.e. @f$ x_{t_0 + t}^{stream} @f$ is stored inside @p input[0][t, stream, ...]). + * + * If setUseTimstampsDim() is set to false then @p input[0] should contain single timestamp, its shape should has form [`N`, `[data dims]`] with at least one dimension. + * (i.e. @f$ x_{t}^{stream} @f$ is stored inside @p input[0][stream, ...]). + */ + + int inputNameToIndex(String inputName) CV_OVERRIDE; + int outputNameToIndex(const String& outputName) CV_OVERRIDE; + }; + + /** @brief Classical recurrent layer + + Accepts two inputs @f$x_t@f$ and @f$h_{t-1}@f$ and compute two outputs @f$o_t@f$ and @f$h_t@f$. + + - input: should contain packed input @f$x_t@f$. + - output: should contain output @f$o_t@f$ (and @f$h_t@f$ if setProduceHiddenOutput() is set to true). + + input[0] should have shape [`T`, `N`, `data_dims`] where `T` and `N` is number of timestamps and number of independent samples of @f$x_t@f$ respectively. + + output[0] will have shape [`T`, `N`, @f$N_o@f$], where @f$N_o@f$ is number of rows in @f$ W_{xo} @f$ matrix. + + If setProduceHiddenOutput() is set to true then @p output[1] will contain a Mat with shape [`T`, `N`, @f$N_h@f$], where @f$N_h@f$ is number of rows in @f$ W_{hh} @f$ matrix. + */ + class CV_EXPORTS RNNLayer : public Layer + { + public: + /** Creates instance of RNNLayer */ + static Ptr create(const LayerParams& params); + + /** Setups learned weights. + + Recurrent-layer behavior on each step is defined by current input @f$ x_t @f$, previous state @f$ h_t @f$ and learned weights as follows: + @f{eqnarray*}{ + h_t &= tanh&(W_{hh} h_{t-1} + W_{xh} x_t + b_h), \\ + o_t &= tanh&(W_{ho} h_t + b_o), + @f} + + @param Wxh is @f$ W_{xh} @f$ matrix + @param bh is @f$ b_{h} @f$ vector + @param Whh is @f$ W_{hh} @f$ matrix + @param Who is @f$ W_{xo} @f$ matrix + @param bo is @f$ b_{o} @f$ vector + */ + virtual void setWeights(const Mat &Wxh, const Mat &bh, const Mat &Whh, const Mat &Who, const Mat &bo) = 0; + + /** @brief If this flag is set to true then layer will produce @f$ h_t @f$ as second output. + * @details Shape of the second output is the same as first output. + */ + virtual void setProduceHiddenOutput(bool produce = false) = 0; + + }; + + class CV_EXPORTS BaseConvolutionLayer : public Layer + { + public: + CV_DEPRECATED_EXTERNAL Size kernel, stride, pad, dilation, adjustPad; + std::vector adjust_pads; + std::vector kernel_size, strides, dilations; + std::vector pads_begin, pads_end; + String padMode; + int numOutput; + }; + + class CV_EXPORTS ConvolutionLayer : public BaseConvolutionLayer + { + public: + static Ptr create(const LayerParams& params); + }; + + class CV_EXPORTS DeconvolutionLayer : public BaseConvolutionLayer + { + public: + static Ptr create(const LayerParams& params); + }; + + class CV_EXPORTS LRNLayer : public Layer + { + public: + int type; + + int size; + float alpha, beta, bias; + bool normBySize; + + static Ptr create(const LayerParams& params); + }; + + class CV_EXPORTS PoolingLayer : public Layer + { + public: + int type; + std::vector kernel_size, strides; + std::vector pads_begin, pads_end; + CV_DEPRECATED_EXTERNAL Size kernel, stride, pad; + CV_DEPRECATED_EXTERNAL int pad_l, pad_t, pad_r, pad_b; + bool globalPooling; //!< Flag is true if at least one of the axes is global pooled. + std::vector isGlobalPooling; + bool computeMaxIdx; + String padMode; + bool ceilMode; + // If true for average pooling with padding, divide an every output region + // by a whole kernel area. Otherwise exclude zero padded values and divide + // by number of real values. + bool avePoolPaddedArea; + // ROIPooling parameters. + Size pooledSize; + float spatialScale; + // PSROIPooling parameters. + int psRoiOutChannels; + + static Ptr create(const LayerParams& params); + }; + + class CV_EXPORTS SoftmaxLayer : public Layer + { + public: + bool logSoftMax; + + static Ptr create(const LayerParams& params); + }; + + class CV_EXPORTS InnerProductLayer : public Layer + { + public: + int axis; + static Ptr create(const LayerParams& params); + }; + + class CV_EXPORTS MVNLayer : public Layer + { + public: + float eps; + bool normVariance, acrossChannels; + + static Ptr create(const LayerParams& params); + }; + + /* Reshaping */ + + class CV_EXPORTS ReshapeLayer : public Layer + { + public: + MatShape newShapeDesc; + Range newShapeRange; + + static Ptr create(const LayerParams& params); + }; + + class CV_EXPORTS FlattenLayer : public Layer + { + public: + static Ptr create(const LayerParams ¶ms); + }; + + class CV_EXPORTS ConcatLayer : public Layer + { + public: + int axis; + /** + * @brief Add zero padding in case of concatenation of blobs with different + * spatial sizes. + * + * Details: https://github.com/torch/nn/blob/master/doc/containers.md#depthconcat + */ + bool padding; + + static Ptr create(const LayerParams ¶ms); + }; + + class CV_EXPORTS SplitLayer : public Layer + { + public: + int outputsCount; //!< Number of copies that will be produced (is ignored when negative). + + static Ptr create(const LayerParams ¶ms); + }; + + /** + * Slice layer has several modes: + * 1. Caffe mode + * @param[in] axis Axis of split operation + * @param[in] slice_point Array of split points + * + * Number of output blobs equals to number of split points plus one. The + * first blob is a slice on input from 0 to @p slice_point[0] - 1 by @p axis, + * the second output blob is a slice of input from @p slice_point[0] to + * @p slice_point[1] - 1 by @p axis and the last output blob is a slice of + * input from @p slice_point[-1] up to the end of @p axis size. + * + * 2. TensorFlow mode + * @param begin Vector of start indices + * @param size Vector of sizes + * + * More convenient numpy-like slice. One and only output blob + * is a slice `input[begin[0]:begin[0]+size[0], begin[1]:begin[1]+size[1], ...]` + * + * 3. Torch mode + * @param axis Axis of split operation + * + * Split input blob on the equal parts by @p axis. + */ + class CV_EXPORTS SliceLayer : public Layer + { + public: + /** + * @brief Vector of slice ranges. + * + * The first dimension equals number of output blobs. + * Inner vector has slice ranges for the first number of input dimensions. + */ + std::vector > sliceRanges; + int axis; + int num_split; + + static Ptr create(const LayerParams ¶ms); + }; + + class CV_EXPORTS PermuteLayer : public Layer + { + public: + static Ptr create(const LayerParams& params); + }; + + /** + * Permute channels of 4-dimensional input blob. + * @param group Number of groups to split input channels and pick in turns + * into output blob. + * + * \f[ groupSize = \frac{number\ of\ channels}{group} \f] + * \f[ output(n, c, h, w) = input(n, groupSize \times (c \% group) + \lfloor \frac{c}{group} \rfloor, h, w) \f] + * Read more at https://arxiv.org/pdf/1707.01083.pdf + */ + class CV_EXPORTS ShuffleChannelLayer : public Layer + { + public: + static Ptr create(const LayerParams& params); + + int group; + }; + + /** + * @brief Adds extra values for specific axes. + * @param paddings Vector of paddings in format + * @code + * [ pad_before, pad_after, // [0]th dimension + * pad_before, pad_after, // [1]st dimension + * ... + * pad_before, pad_after ] // [n]th dimension + * @endcode + * that represents number of padded values at every dimension + * starting from the first one. The rest of dimensions won't + * be padded. + * @param value Value to be padded. Defaults to zero. + * @param type Padding type: 'constant', 'reflect' + * @param input_dims Torch's parameter. If @p input_dims is not equal to the + * actual input dimensionality then the `[0]th` dimension + * is considered as a batch dimension and @p paddings are shifted + * to a one dimension. Defaults to `-1` that means padding + * corresponding to @p paddings. + */ + class CV_EXPORTS PaddingLayer : public Layer + { + public: + static Ptr create(const LayerParams& params); + }; + + /* Activations */ + class CV_EXPORTS ActivationLayer : public Layer + { + public: + virtual void forwardSlice(const float* src, float* dst, int len, + size_t outPlaneSize, int cn0, int cn1) const = 0; + }; + + class CV_EXPORTS ReLULayer : public ActivationLayer + { + public: + float negativeSlope; + + static Ptr create(const LayerParams ¶ms); + }; + + class CV_EXPORTS ReLU6Layer : public ActivationLayer + { + public: + float minValue, maxValue; + + static Ptr create(const LayerParams ¶ms); + }; + + class CV_EXPORTS ChannelsPReLULayer : public ActivationLayer + { + public: + static Ptr create(const LayerParams& params); + }; + + class CV_EXPORTS ELULayer : public ActivationLayer + { + public: + static Ptr create(const LayerParams ¶ms); + }; + + class CV_EXPORTS TanHLayer : public ActivationLayer + { + public: + static Ptr create(const LayerParams ¶ms); + }; + + class CV_EXPORTS SwishLayer : public ActivationLayer + { + public: + static Ptr create(const LayerParams ¶ms); + }; + + class CV_EXPORTS MishLayer : public ActivationLayer + { + public: + static Ptr create(const LayerParams ¶ms); + }; + + class CV_EXPORTS SigmoidLayer : public ActivationLayer + { + public: + static Ptr create(const LayerParams ¶ms); + }; + + class CV_EXPORTS BNLLLayer : public ActivationLayer + { + public: + static Ptr create(const LayerParams ¶ms); + }; + + class CV_EXPORTS AbsLayer : public ActivationLayer + { + public: + static Ptr create(const LayerParams ¶ms); + }; + + class CV_EXPORTS PowerLayer : public ActivationLayer + { + public: + float power, scale, shift; + + static Ptr create(const LayerParams ¶ms); + }; + + /* Layers used in semantic segmentation */ + + class CV_EXPORTS CropLayer : public Layer + { + public: + static Ptr create(const LayerParams ¶ms); + }; + + /** @brief Element wise operation on inputs + + Extra optional parameters: + - "operation" as string. Values are "sum" (default), "prod", "max", "div" + - "coeff" as float array. Specify weights of inputs for SUM operation + - "output_channels_mode" as string. Values are "same" (default, all input must have the same layout), "input_0", "input_0_truncate", "max_input_channels" + */ + class CV_EXPORTS EltwiseLayer : public Layer + { + public: + static Ptr create(const LayerParams ¶ms); + }; + + class CV_EXPORTS BatchNormLayer : public ActivationLayer + { + public: + bool hasWeights, hasBias; + float epsilon; + + static Ptr create(const LayerParams ¶ms); + }; + + class CV_EXPORTS MaxUnpoolLayer : public Layer + { + public: + Size poolKernel; + Size poolPad; + Size poolStride; + + static Ptr create(const LayerParams ¶ms); + }; + + class CV_EXPORTS ScaleLayer : public Layer + { + public: + bool hasBias; + int axis; + + static Ptr create(const LayerParams& params); + }; + + class CV_EXPORTS ShiftLayer : public Layer + { + public: + static Ptr create(const LayerParams& params); + }; + + class CV_EXPORTS PriorBoxLayer : public Layer + { + public: + static Ptr create(const LayerParams& params); + }; + + class CV_EXPORTS ReorgLayer : public Layer + { + public: + static Ptr create(const LayerParams& params); + }; + + class CV_EXPORTS RegionLayer : public Layer + { + public: + static Ptr create(const LayerParams& params); + }; + + class CV_EXPORTS DetectionOutputLayer : public Layer + { + public: + static Ptr create(const LayerParams& params); + }; + + /** + * @brief \f$ L_p \f$ - normalization layer. + * @param p Normalization factor. The most common `p = 1` for \f$ L_1 \f$ - + * normalization or `p = 2` for \f$ L_2 \f$ - normalization or a custom one. + * @param eps Parameter \f$ \epsilon \f$ to prevent a division by zero. + * @param across_spatial If true, normalize an input across all non-batch dimensions. + * Otherwise normalize an every channel separately. + * + * Across spatial: + * @f[ + * norm = \sqrt[p]{\epsilon + \sum_{x, y, c} |src(x, y, c)|^p } \\ + * dst(x, y, c) = \frac{ src(x, y, c) }{norm} + * @f] + * + * Channel wise normalization: + * @f[ + * norm(c) = \sqrt[p]{\epsilon + \sum_{x, y} |src(x, y, c)|^p } \\ + * dst(x, y, c) = \frac{ src(x, y, c) }{norm(c)} + * @f] + * + * Where `x, y` - spatial coordinates, `c` - channel. + * + * An every sample in the batch is normalized separately. Optionally, + * output is scaled by the trained parameters. + */ + class CV_EXPORTS NormalizeBBoxLayer : public Layer + { + public: + float pnorm, epsilon; + CV_DEPRECATED_EXTERNAL bool acrossSpatial; + + static Ptr create(const LayerParams& params); + }; + + /** + * @brief Resize input 4-dimensional blob by nearest neighbor or bilinear strategy. + * + * Layer is used to support TensorFlow's resize_nearest_neighbor and resize_bilinear ops. + */ + class CV_EXPORTS ResizeLayer : public Layer + { + public: + static Ptr create(const LayerParams& params); + }; + + /** + * @brief Bilinear resize layer from https://github.com/cdmh/deeplab-public-ver2 + * + * It differs from @ref ResizeLayer in output shape and resize scales computations. + */ + class CV_EXPORTS InterpLayer : public Layer + { + public: + static Ptr create(const LayerParams& params); + }; + + class CV_EXPORTS ProposalLayer : public Layer + { + public: + static Ptr create(const LayerParams& params); + }; + + class CV_EXPORTS CropAndResizeLayer : public Layer + { + public: + static Ptr create(const LayerParams& params); + }; + +//! @} +//! @} +CV__DNN_INLINE_NS_END +} +} +#endif diff --git a/IPL/include/opencv/opencv2/dnn/blob.hpp b/IPL/include/opencv/opencv2/dnn/blob.hpp deleted file mode 100644 index bc582c8..0000000 --- a/IPL/include/opencv/opencv2/dnn/blob.hpp +++ /dev/null @@ -1,238 +0,0 @@ -/*M/////////////////////////////////////////////////////////////////////////////////////// -// -// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. -// -// By downloading, copying, installing or using the software you agree to this license. -// If you do not agree to this license, do not download, install, -// copy or use the software. -// -// -// License Agreement -// For Open Source Computer Vision Library -// -// Copyright (C) 2013, OpenCV Foundation, all rights reserved. -// Third party copyrights are property of their respective owners. -// -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: -// -// * Redistribution's of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// * Redistribution's in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// * The name of the copyright holders may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// This software is provided by the copyright holders and contributors "as is" and -// any express or implied warranties, including, but not limited to, the implied -// warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, -// indirect, incidental, special, exemplary, or consequential damages -// (including, but not limited to, procurement of substitute goods or services; -// loss of use, data, or profits; or business interruption) however caused -// and on any theory of liability, whether in contract, strict liability, -// or tort (including negligence or otherwise) arising in any way out of -// the use of this software, even if advised of the possibility of such damage. -// -//M*/ - -#ifndef __OPENCV_DNN_DNN_BLOB_HPP__ -#define __OPENCV_DNN_DNN_BLOB_HPP__ -#include -#include -#include - -namespace cv -{ -namespace dnn -{ -//! @addtogroup dnn -//! @{ - - /** @brief Lightweight class for storing and processing a shape of blob (or anything else). */ - struct BlobShape - { - explicit BlobShape(int ndims = 4, int fill = 1); //!< Creates n-dim shape and fill its by @p fill - BlobShape(int num, int cn, int rows, int cols); //!< Creates 4-dim shape [@p num, @p cn, @p rows, @p cols] - BlobShape(int ndims, const int *sizes); //!< Creates n-dim shape from the @p sizes array - BlobShape(const std::vector &sizes); //!< Creates n-dim shape from the @p sizes vector - template - BlobShape(const Vec &shape); //!< Creates n-dim shape from @ref cv::Vec - - /** @brief Returns number of dimensions. */ - int dims() const; - - /** @brief Returns reference to the size of the specified @p axis. - * - * Negative @p axis is supported, in this case a counting starts from the last axis, - * i. e. -1 corresponds to last axis. - * If non-existing axis was passed then an error will be generated. - */ - int &size(int axis); - - /** @brief Returns the size of the specified @p axis. - * @see size() - */ - int size(int axis) const; - - int operator[](int axis) const; //!< Does the same thing as size(axis). - int &operator[](int axis); //!< Does the same thing as size(int) const. - - /** @brief Returns the size of the specified @p axis. - * - * Does the same thing as size(int) const, but if non-existing axis will be passed then 1 will be returned, - * therefore this function always finishes successfully. - */ - int xsize(int axis) const; - - /** @brief Returns the product of all sizes of axes. */ - ptrdiff_t total(); - - /** @brief Returns pointer to the first element of continuous size array. */ - const int *ptr() const; - - /** @brief Checks equality of two shapes. */ - bool equal(const BlobShape &other) const; - - bool operator== (const BlobShape &r) const; - - private: - cv::AutoBuffer sz; - }; - - - /** @brief This class provides methods for continuous n-dimensional CPU and GPU array processing. - * - * The class is realized as a wrapper over @ref cv::Mat and @ref cv::UMat. - * It will support methods for switching and logical synchronization between CPU and GPU. - */ - class CV_EXPORTS Blob - { - public: - explicit Blob(); - - /** @brief Constructs blob with specified @p shape and @p type. */ - explicit Blob(const BlobShape &shape, int type = CV_32F); - - /** @brief Constucts 4-dimensional blob (so-called batch) from image or array of images. - * @param image 2-dimensional multi-channel or 3-dimensional single-channel image (or array of images) - * @param dstCn specify size of second axis of ouptut blob - */ - explicit Blob(InputArray image, int dstCn = -1); - - /** @brief Creates blob with specified @p shape and @p type. */ - void create(const BlobShape &shape, int type = CV_32F); - - /** @brief Creates blob from cv::Mat or cv::UMat without copying the data */ - void fill(InputArray in); - /** @brief Creates blob from user data. - * @details If @p deepCopy is false then CPU data will not be allocated. - */ - void fill(const BlobShape &shape, int type, void *data, bool deepCopy = true); - - Mat& matRef(); //!< Returns reference to cv::Mat, containing blob data. - const Mat& matRefConst() const; //!< Returns reference to cv::Mat, containing blob data, for read-only purposes. - UMat &umatRef(); //!< Returns reference to cv::UMat, containing blob data (not implemented yet). - const UMat &umatRefConst() const; //!< Returns reference to cv::UMat, containing blob data, for read-only purposes (not implemented yet). - - /** @brief Returns number of blob dimensions. */ - int dims() const; - - /** @brief Returns the size of the specified @p axis. - * - * Negative @p axis is supported, in this case a counting starts from the last axis, - * i. e. -1 corresponds to last axis. - * If non-existing axis was passed then an error will be generated. - */ - int size(int axis) const; - - /** @brief Returns the size of the specified @p axis. - * - * Does the same thing as size(int) const, but if non-existing axis will be passed then 1 will be returned, - * therefore this function always finishes successfully. - */ - int xsize(int axis) const; - - /** @brief Computes the product of sizes of axes among the specified axes range [@p startAxis; @p endAxis). - * @param startAxis the first axis to include in the range. - * @param endAxis the first axis to exclude from the range. - * @details Negative axis indexing can be used. - */ - size_t total(int startAxis = 0, int endAxis = INT_MAX) const; - - /** @brief Converts @p axis index to canonical format (where 0 <= axis < dims()). */ - int canonicalAxis(int axis) const; - - /** @brief Returns shape of the blob. */ - BlobShape shape() const; - - /** @brief Checks equality of two blobs shapes. */ - bool equalShape(const Blob &other) const; - - /** @brief Returns slice of first two dimensions. - * @details The behaviour is similar to the following numpy code: blob[n, cn, ...] - */ - Mat getPlane(int n, int cn); - - /* Shape getters of 4-dimensional blobs. */ - int cols() const; //!< Returns size of the fourth axis blob. - int rows() const; //!< Returns size of the thrid axis blob. - int channels() const; //!< Returns size of the second axis blob. - int num() const; //!< Returns size of the first axis blob. - Size size2() const; //!< Returns cv::Size(cols(), rows()) - Vec4i shape4() const; //!< Returns shape of first four blob axes. - - /** @brief Returns linear index of the element with specified coordinates in the blob. - * - * If @p n < dims() then unspecified coordinates will be filled by zeros. - * If @p n > dims() then extra coordinates will be ignored. - */ - template - size_t offset(const Vec &pos) const; - /** @overload */ - size_t offset(int n = 0, int cn = 0, int row = 0, int col = 0) const; - - /* CPU pointer getters */ - /** @brief Returns pointer to the blob element with the specified position, stored in CPU memory. - * - * @p n correspond to the first axis, @p cn - to the second, etc. - * If dims() > 4 then unspecified coordinates will be filled by zeros. - * If dims() < 4 then extra coordinates will be ignored. - */ - uchar *ptr(int n = 0, int cn = 0, int row = 0, int col = 0); - /** @overload */ - template - TFloat *ptr(int n = 0, int cn = 0, int row = 0, int col = 0); - /** @overload ptr() */ - float *ptrf(int n = 0, int cn = 0, int row = 0, int col = 0); - //TODO: add const ptr methods - - /** @brief Shares data from other @p blob. - * @returns *this - */ - Blob &shareFrom(const Blob &blob); - - /** @brief Changes shape of the blob without copying the data. - * @returns *this - */ - Blob &reshape(const BlobShape &shape); - - /** @brief Returns type of the blob. */ - int type() const; - - private: - const int *sizes() const; - - Mat m; - }; - -//! @} -} -} - -#include "blob.inl.hpp" - -#endif diff --git a/IPL/include/opencv/opencv2/dnn/blob.inl.hpp b/IPL/include/opencv/opencv2/dnn/blob.inl.hpp deleted file mode 100644 index 4a6de48..0000000 --- a/IPL/include/opencv/opencv2/dnn/blob.inl.hpp +++ /dev/null @@ -1,342 +0,0 @@ -/*M/////////////////////////////////////////////////////////////////////////////////////// -// -// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. -// -// By downloading, copying, installing or using the software you agree to this license. -// If you do not agree to this license, do not download, install, -// copy or use the software. -// -// -// License Agreement -// For Open Source Computer Vision Library -// -// Copyright (C) 2013, OpenCV Foundation, all rights reserved. -// Third party copyrights are property of their respective owners. -// -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: -// -// * Redistribution's of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// * Redistribution's in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// * The name of the copyright holders may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// This software is provided by the copyright holders and contributors "as is" and -// any express or implied warranties, including, but not limited to, the implied -// warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, -// indirect, incidental, special, exemplary, or consequential damages -// (including, but not limited to, procurement of substitute goods or services; -// loss of use, data, or profits; or business interruption) however caused -// and on any theory of liability, whether in contract, strict liability, -// or tort (including negligence or otherwise) arising in any way out of -// the use of this software, even if advised of the possibility of such damage. -// -//M*/ - -#ifndef __OPENCV_DNN_DNN_BLOB_INL_HPP__ -#define __OPENCV_DNN_DNN_BLOB_INL_HPP__ -#include "blob.hpp" - -namespace cv -{ -namespace dnn -{ - -inline BlobShape::BlobShape(int ndims, int fill) : sz( (size_t)std::max(ndims, 0) ) -{ - CV_Assert(ndims >= 0); - for (int i = 0; i < ndims; i++) - sz[i] = fill; -} - -inline BlobShape::BlobShape(int ndims, const int *sizes) : sz( (size_t)std::max(ndims, 0) ) -{ - CV_Assert(ndims >= 0); - for (int i = 0; i < ndims; i++) - sz[i] = sizes[i]; -} - -inline BlobShape::BlobShape(int num, int cn, int rows, int cols) : sz(4) -{ - sz[0] = num; - sz[1] = cn; - sz[2] = rows; - sz[3] = cols; -} - -inline BlobShape::BlobShape(const std::vector &sizes) : sz( sizes.size() ) -{ - for (int i = 0; i < (int)sizes.size(); i++) - sz[i] = sizes[i]; -} - -template -inline BlobShape::BlobShape(const Vec &shape) : sz(n) -{ - for (int i = 0; i < n; i++) - sz[i] = shape[i]; -} - -inline int BlobShape::dims() const -{ - return (int)sz.size(); -} - -inline int BlobShape::xsize(int axis) const -{ - if (axis < -dims() || axis >= dims()) - return 1; - - return sz[(axis < 0) ? axis + dims() : axis]; -} - -inline int BlobShape::size(int axis) const -{ - CV_Assert(-dims() <= axis && axis < dims()); - return sz[(axis < 0) ? axis + dims() : axis]; -} - -inline int &BlobShape::size(int axis) -{ - CV_Assert(-dims() <= axis && axis < dims()); - return sz[(axis < 0) ? axis + dims() : axis]; -} - -inline int BlobShape::operator[] (int axis) const -{ - CV_Assert(-dims() <= axis && axis < dims()); - return sz[(axis < 0) ? axis + dims() : axis]; -} - -inline int &BlobShape::operator[] (int axis) -{ - CV_Assert(-dims() <= axis && axis < dims()); - return sz[(axis < 0) ? axis + dims() : axis]; -} - -inline ptrdiff_t BlobShape::total() -{ - if (dims() == 0) - return 0; - - ptrdiff_t res = 1; - for (int i = 0; i < dims(); i++) - res *= sz[i]; - return res; -} - -inline const int *BlobShape::ptr() const -{ - return sz; -} - -inline bool BlobShape::equal(const BlobShape &other) const -{ - if (this->dims() != other.dims()) - return false; - - for (int i = 0; i < other.dims(); i++) - { - if (sz[i] != other.sz[i]) - return false; - } - - return true; -} - -inline bool BlobShape::operator==(const BlobShape &r) const -{ - return this->equal(r); -} - -CV_EXPORTS std::ostream &operator<< (std::ostream &stream, const BlobShape &shape); - -///////////////////////////////////////////////////////////////////// - -inline int Blob::canonicalAxis(int axis) const -{ - CV_Assert(-dims() <= axis && axis < dims()); - return (axis < 0) ? axis + dims() : axis; -} - -inline int Blob::dims() const -{ - return m.dims; -} - -inline int Blob::xsize(int axis) const -{ - if (axis < -dims() || axis >= dims()) - return 1; - - return sizes()[(axis < 0) ? axis + dims() : axis]; -} - -inline int Blob::size(int axis) const -{ - CV_Assert(-dims() <= axis && axis < dims()); - return sizes()[(axis < 0) ? axis + dims() : axis]; -} - -inline size_t Blob::total(int startAxis, int endAxis) const -{ - if (startAxis < 0) - startAxis += dims(); - - if (endAxis == INT_MAX) - endAxis = dims(); - else if (endAxis < 0) - endAxis += dims(); - - CV_Assert(0 <= startAxis && startAxis <= endAxis && endAxis <= dims()); - - size_t size = 1; //fix: assume that slice isn't empty - for (int i = startAxis; i < endAxis; i++) - size *= (size_t)sizes()[i]; - - return size; -} - - -template -inline size_t Blob::offset(const Vec &pos) const -{ - size_t ofs = 0; - int i; - for (i = 0; i < std::min(n, dims()); i++) - { - CV_DbgAssert(pos[i] >= 0 && pos[i] < size(i)); - ofs = ofs * (size_t)size(i) + pos[i]; - } - for (; i < dims(); i++) - ofs *= (size_t)size(i); - return ofs; -} - -inline size_t Blob::offset(int n, int cn, int row, int col) const -{ - return offset(Vec4i(n, cn, row, col)); -} - -inline float *Blob::ptrf(int n, int cn, int row, int col) -{ - CV_Assert(type() == CV_32F); - return (float*)m.data + offset(n, cn, row, col); -} - -inline uchar *Blob::ptr(int n, int cn, int row, int col) -{ - return m.data + m.elemSize() * offset(n, cn, row, col); -} - -template -inline TFloat* Blob::ptr(int n, int cn, int row, int col) -{ - CV_Assert(type() == cv::DataDepth::value); - return (TFloat*) ptr(n, cn, row, col); -} - -inline BlobShape Blob::shape() const -{ - return BlobShape(dims(), sizes()); -} - -inline bool Blob::equalShape(const Blob &other) const -{ - if (this->dims() != other.dims()) - return false; - - for (int i = 0; i < dims(); i++) - { - if (this->sizes()[i] != other.sizes()[i]) - return false; - } - return true; -} - -inline Mat& Blob::matRef() -{ - return m; -} - -inline const Mat& Blob::matRefConst() const -{ - return m; -} - -inline UMat &Blob::umatRef() -{ - CV_Error(Error::StsNotImplemented, ""); - return *(new UMat()); -} - -inline const UMat &Blob::umatRefConst() const -{ - CV_Error(Error::StsNotImplemented, ""); - return *(new UMat()); -} - -inline Mat Blob::getPlane(int n, int cn) -{ - CV_Assert(dims() > 2); - return Mat(dims() - 2, sizes() + 2, type(), ptr(n, cn)); -} - -inline int Blob::cols() const -{ - return xsize(3); -} - -inline int Blob::rows() const -{ - return xsize(2); -} - -inline int Blob::channels() const -{ - return xsize(1); -} - -inline int Blob::num() const -{ - return xsize(0); -} - -inline Size Blob::size2() const -{ - return Size(cols(), rows()); -} - -inline int Blob::type() const -{ - return m.depth(); -} - -inline const int * Blob::sizes() const -{ - return &m.size[0]; -} - - -inline Blob &Blob::shareFrom(const Blob &blob) -{ - this->m = blob.m; - return *this; -} - -inline Blob &Blob::reshape(const BlobShape &shape) -{ - m = m.reshape(1, shape.dims(), shape.ptr()); - return *this; -} - -} -} - -#endif diff --git a/IPL/include/opencv/opencv2/dnn/dict.hpp b/IPL/include/opencv/opencv2/dnn/dict.hpp index 61db133..463d314 100644 --- a/IPL/include/opencv/opencv2/dnn/dict.hpp +++ b/IPL/include/opencv/opencv2/dnn/dict.hpp @@ -39,30 +39,34 @@ // //M*/ -#ifndef __OPENCV_DNN_DNN_DICT_HPP__ -#define __OPENCV_DNN_DNN_DICT_HPP__ - #include #include #include -namespace cv -{ -namespace dnn -{ +#include + +#ifndef OPENCV_DNN_DNN_DICT_HPP +#define OPENCV_DNN_DNN_DICT_HPP + +namespace cv { +namespace dnn { +CV__DNN_INLINE_NS_BEGIN //! @addtogroup dnn //! @{ /** @brief This struct stores the scalar value (or array) of one of the following type: double, cv::String or int64. * @todo Maybe int64 is useless because double type exactly stores at least 2^52 integers. */ -struct DictValue +struct CV_EXPORTS_W DictValue { DictValue(const DictValue &r); - DictValue(int p = 0) : type(Param::INT), pi(new AutoBuffer) { (*pi)[0] = p; } //!< Constructs integer scalar + DictValue(bool i) : type(Param::INT), pi(new AutoBuffer) { (*pi)[0] = i ? 1 : 0; } //!< Constructs integer scalar + DictValue(int64 i = 0) : type(Param::INT), pi(new AutoBuffer) { (*pi)[0] = i; } //!< Constructs integer scalar + CV_WRAP DictValue(int i) : type(Param::INT), pi(new AutoBuffer) { (*pi)[0] = i; } //!< Constructs integer scalar DictValue(unsigned p) : type(Param::INT), pi(new AutoBuffer) { (*pi)[0] = p; } //!< Constructs integer scalar - DictValue(double p) : type(Param::REAL), pd(new AutoBuffer) { (*pd)[0] = p; } //!< Constructs floating point scalar - DictValue(const String &p) : type(Param::STRING), ps(new AutoBuffer) { (*ps)[0] = p; } //!< Constructs string scalar + CV_WRAP DictValue(double p) : type(Param::REAL), pd(new AutoBuffer) { (*pd)[0] = p; } //!< Constructs floating point scalar + CV_WRAP DictValue(const String &s) : type(Param::STRING), ps(new AutoBuffer) { (*ps)[0] = s; } //!< Constructs string scalar + DictValue(const char *s) : type(Param::STRING), ps(new AutoBuffer) { (*ps)[0] = s; } //!< @overload template static DictValue arrayInt(TypeIter begin, int size); //!< Constructs integer array @@ -76,9 +80,13 @@ struct DictValue int size() const; - bool isInt() const; - bool isString() const; - bool isReal() const; + CV_WRAP bool isInt() const; + CV_WRAP bool isString() const; + CV_WRAP bool isReal() const; + + CV_WRAP int getIntValue(int idx = -1) const; + CV_WRAP double getRealValue(int idx = -1) const; + CV_WRAP String getStringValue(int idx = -1) const; DictValue &operator=(const DictValue &r); @@ -88,17 +96,17 @@ struct DictValue private: - int type; + Param type; union { AutoBuffer *pi; AutoBuffer *pd; AutoBuffer *ps; - void *p; + void *pv; }; - DictValue(int _type, void *_p) : type(_type), p(_p) {} + DictValue(Param _type, void *_p) : type(_type), pv(_p) {} void release(); }; @@ -111,11 +119,14 @@ class CV_EXPORTS Dict public: //! Checks a presence of the @p key in the dictionary. - bool has(const String &key); + bool has(const String &key) const; //! If the @p key in the dictionary then returns pointer to its value, else returns NULL. DictValue *ptr(const String &key); + /** @overload */ + const DictValue *ptr(const String &key) const; + //! If the @p key in the dictionary then returns its value, else an error will be generated. const DictValue &get(const String &key) const; @@ -131,10 +142,18 @@ class CV_EXPORTS Dict template const T &set(const String &key, const T &value); + //! Erase @p key from the dictionary. + void erase(const String &key); + friend std::ostream &operator<<(std::ostream &stream, const Dict &dict); + + std::map::const_iterator begin() const; + + std::map::const_iterator end() const; }; //! @} +CV__DNN_INLINE_NS_END } } diff --git a/IPL/include/opencv/opencv2/dnn/dnn.hpp b/IPL/include/opencv/opencv2/dnn/dnn.hpp index 1d1244d..3b12508 100644 --- a/IPL/include/opencv/opencv2/dnn/dnn.hpp +++ b/IPL/include/opencv/opencv2/dnn/dnn.hpp @@ -39,66 +39,214 @@ // //M*/ -#ifndef __OPENCV_DNN_DNN_HPP__ -#define __OPENCV_DNN_DNN_HPP__ +#ifndef OPENCV_DNN_DNN_HPP +#define OPENCV_DNN_DNN_HPP #include #include +#include "opencv2/core/async.hpp" + +#include "../dnn/version.hpp" + #include -#include -namespace cv -{ -namespace dnn //! This namespace is used for dnn module functionlaity. -{ +namespace cv { +namespace dnn { +CV__DNN_INLINE_NS_BEGIN //! @addtogroup dnn //! @{ - /** @brief Initialize dnn module and built-in layers. - * - * This function automatically called on most of OpenCV builds, - * but you need to call it manually on some specific configurations (iOS for example). + typedef std::vector MatShape; + + /** + * @brief Enum of computation backends supported by layers. + * @see Net::setPreferableBackend */ - CV_EXPORTS void initModule(); + enum Backend + { + //! DNN_BACKEND_DEFAULT equals to DNN_BACKEND_INFERENCE_ENGINE if + //! OpenCV is built with Intel's Inference Engine library or + //! DNN_BACKEND_OPENCV otherwise. + DNN_BACKEND_DEFAULT = 0, + DNN_BACKEND_HALIDE, + DNN_BACKEND_INFERENCE_ENGINE, //!< Intel's Inference Engine computational backend + //!< @sa setInferenceEngineBackendType + DNN_BACKEND_OPENCV, + DNN_BACKEND_VKCOM, + DNN_BACKEND_CUDA, +#ifdef __OPENCV_BUILD + DNN_BACKEND_INFERENCE_ENGINE_NGRAPH = 1000000, // internal - use DNN_BACKEND_INFERENCE_ENGINE + setInferenceEngineBackendType() + DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019, // internal - use DNN_BACKEND_INFERENCE_ENGINE + setInferenceEngineBackendType() +#endif + }; + + /** + * @brief Enum of target devices for computations. + * @see Net::setPreferableTarget + */ + enum Target + { + DNN_TARGET_CPU = 0, + DNN_TARGET_OPENCL, + DNN_TARGET_OPENCL_FP16, + DNN_TARGET_MYRIAD, + DNN_TARGET_VULKAN, + DNN_TARGET_FPGA, //!< FPGA device with CPU fallbacks using Inference Engine's Heterogeneous plugin. + DNN_TARGET_CUDA, + DNN_TARGET_CUDA_FP16 + }; + + CV_EXPORTS std::vector< std::pair > getAvailableBackends(); + CV_EXPORTS_W std::vector getAvailableTargets(dnn::Backend be); /** @brief This class provides all data needed to initialize layer. * - * It includes dictionary with scalar params (which can be readed by using Dict interface), + * It includes dictionary with scalar params (which can be read by using Dict interface), * blob params #blobs and optional meta information: #name and #type of layer instance. */ - struct CV_EXPORTS LayerParams : public Dict + class CV_EXPORTS LayerParams : public Dict { - std::vector blobs; //!< List of learned parameters stored as blobs. + public: + //TODO: Add ability to name blob params + std::vector blobs; //!< List of learned parameters stored as blobs. String name; //!< Name of the layer instance (optional, can be used internal purposes). String type; //!< Type name which was used for creating layer by layer factory (optional). }; + /** + * @brief Derivatives of this class encapsulates functions of certain backends. + */ + class BackendNode + { + public: + BackendNode(int backendId); + + virtual ~BackendNode(); //!< Virtual destructor to make polymorphism. + + int backendId; //!< Backend identifier. + }; + + /** + * @brief Derivatives of this class wraps cv::Mat for different backends and targets. + */ + class BackendWrapper + { + public: + BackendWrapper(int backendId, int targetId); + + /** + * @brief Wrap cv::Mat for specific backend and target. + * @param[in] targetId Target identifier. + * @param[in] m cv::Mat for wrapping. + * + * Make CPU->GPU data transfer if it's require for the target. + */ + BackendWrapper(int targetId, const cv::Mat& m); + + /** + * @brief Make wrapper for reused cv::Mat. + * @param[in] base Wrapper of cv::Mat that will be reused. + * @param[in] shape Specific shape. + * + * Initialize wrapper from another one. It'll wrap the same host CPU + * memory and mustn't allocate memory on device(i.e. GPU). It might + * has different shape. Use in case of CPU memory reusing for reuse + * associated memory on device too. + */ + BackendWrapper(const Ptr& base, const MatShape& shape); + + virtual ~BackendWrapper(); //!< Virtual destructor to make polymorphism. + + /** + * @brief Transfer data to CPU host memory. + */ + virtual void copyToHost() = 0; + + /** + * @brief Indicate that an actual data is on CPU. + */ + virtual void setHostDirty() = 0; + + int backendId; //!< Backend identifier. + int targetId; //!< Target identifier. + }; + + class CV_EXPORTS ActivationLayer; + /** @brief This interface class allows to build new Layers - are building blocks of networks. * * Each class, derived from Layer, must implement allocate() methods to declare own outputs and forward() to compute outputs. - * Also before using the new layer into networks you must register your layer by using one of @ref LayerFactoryModule "LayerFactory" macros. + * Also before using the new layer into networks you must register your layer by using one of @ref dnnLayerFactory "LayerFactory" macros. */ - struct CV_EXPORTS Layer + class CV_EXPORTS_W Layer : public Algorithm { + public: + //! List of learned parameters must be stored here to allow read them by using Net::getParam(). - std::vector blobs; + CV_PROP_RW std::vector blobs; - /** @brief Allocates internal buffers and output blobs with respect to the shape of inputs. + /** @brief Computes and sets internal parameters according to inputs, outputs and blobs. + * @deprecated Use Layer::finalize(InputArrayOfArrays, OutputArrayOfArrays) instead * @param[in] input vector of already allocated input blobs - * @param[out] output vector of output blobs, which must be allocated + * @param[out] output vector of already allocated output blobs + * + * If this method is called after network has allocated all memory for input and output blobs + * and before inferencing. + */ + CV_DEPRECATED_EXTERNAL + virtual void finalize(const std::vector &input, std::vector &output); + + /** @brief Computes and sets internal parameters according to inputs, outputs and blobs. + * @param[in] inputs vector of already allocated input blobs + * @param[out] outputs vector of already allocated output blobs * - * This method must create each produced blob according to shape of @p input blobs and internal layer params. - * If this method is called first time then @p output vector consists from empty blobs and its size determined by number of output connections. - * This method can be called multiple times if size of any @p input blob was changed. + * If this method is called after network has allocated all memory for input and output blobs + * and before inferencing. */ - virtual void allocate(const std::vector &input, std::vector &output) = 0; + CV_WRAP virtual void finalize(InputArrayOfArrays inputs, OutputArrayOfArrays outputs); /** @brief Given the @p input blobs, computes the output @p blobs. + * @deprecated Use Layer::forward(InputArrayOfArrays, OutputArrayOfArrays, OutputArrayOfArrays) instead * @param[in] input the input blobs. * @param[out] output allocated output blobs, which will store results of the computation. + * @param[out] internals allocated internal blobs + */ + CV_DEPRECATED_EXTERNAL + virtual void forward(std::vector &input, std::vector &output, std::vector &internals); + + /** @brief Given the @p input blobs, computes the output @p blobs. + * @param[in] inputs the input blobs. + * @param[out] outputs allocated output blobs, which will store results of the computation. + * @param[out] internals allocated internal blobs */ - virtual void forward(std::vector &input, std::vector &output) = 0; + virtual void forward(InputArrayOfArrays inputs, OutputArrayOfArrays outputs, OutputArrayOfArrays internals); + + /** @brief Given the @p input blobs, computes the output @p blobs. + * @param[in] inputs the input blobs. + * @param[out] outputs allocated output blobs, which will store results of the computation. + * @param[out] internals allocated internal blobs + */ + void forward_fallback(InputArrayOfArrays inputs, OutputArrayOfArrays outputs, OutputArrayOfArrays internals); + + /** @brief + * @overload + * @deprecated Use Layer::finalize(InputArrayOfArrays, OutputArrayOfArrays) instead + */ + CV_DEPRECATED_EXTERNAL + void finalize(const std::vector &inputs, CV_OUT std::vector &outputs); + + /** @brief + * @overload + * @deprecated Use Layer::finalize(InputArrayOfArrays, OutputArrayOfArrays) instead + */ + CV_DEPRECATED std::vector finalize(const std::vector &inputs); + + /** @brief Allocates layer and computes output. + * @deprecated This method will be removed in the future release. + */ + CV_DEPRECATED CV_WRAP void run(const std::vector &inputs, CV_OUT std::vector &outputs, + CV_IN_OUT std::vector &internals); /** @brief Returns index of input blob into the input array. * @param inputName label of input blob @@ -110,13 +258,122 @@ namespace dnn //! This namespace is used for dnn module functionlaity. /** @brief Returns index of output blob in output array. * @see inputNameToIndex() */ - virtual int outputNameToIndex(String outputName); + CV_WRAP virtual int outputNameToIndex(const String& outputName); + + /** + * @brief Ask layer if it support specific backend for doing computations. + * @param[in] backendId computation backend identifier. + * @see Backend + */ + virtual bool supportBackend(int backendId); + + /** + * @brief Returns Halide backend node. + * @param[in] inputs Input Halide buffers. + * @see BackendNode, BackendWrapper + * + * Input buffers should be exactly the same that will be used in forward invocations. + * Despite we can use Halide::ImageParam based on input shape only, + * it helps prevent some memory management issues (if something wrong, + * Halide tests will be failed). + */ + virtual Ptr initHalide(const std::vector > &inputs); + + virtual Ptr initInfEngine(const std::vector > &inputs); + + virtual Ptr initNgraph(const std::vector > &inputs, const std::vector >& nodes); + + virtual Ptr initVkCom(const std::vector > &inputs); + + /** + * @brief Returns a CUDA backend node + * + * @param context void pointer to CSLContext object + * @param inputs layer inputs + * @param outputs layer outputs + */ + virtual Ptr initCUDA( + void *context, + const std::vector>& inputs, + const std::vector>& outputs + ); + + /** + * @brief Automatic Halide scheduling based on layer hyper-parameters. + * @param[in] node Backend node with Halide functions. + * @param[in] inputs Blobs that will be used in forward invocations. + * @param[in] outputs Blobs that will be used in forward invocations. + * @param[in] targetId Target identifier + * @see BackendNode, Target + * + * Layer don't use own Halide::Func members because we can have applied + * layers fusing. In this way the fused function should be scheduled. + */ + virtual void applyHalideScheduler(Ptr& node, + const std::vector &inputs, + const std::vector &outputs, + int targetId) const; + + /** + * @brief Implement layers fusing. + * @param[in] node Backend node of bottom layer. + * @see BackendNode + * + * Actual for graph-based backends. If layer attached successfully, + * returns non-empty cv::Ptr to node of the same backend. + * Fuse only over the last function. + */ + virtual Ptr tryAttach(const Ptr& node); + + /** + * @brief Tries to attach to the layer the subsequent activation layer, i.e. do the layer fusion in a partial case. + * @param[in] layer The subsequent activation layer. + * + * Returns true if the activation layer has been attached successfully. + */ + virtual bool setActivation(const Ptr& layer); + + /** + * @brief Try to fuse current layer with a next one + * @param[in] top Next layer to be fused. + * @returns True if fusion was performed. + */ + virtual bool tryFuse(Ptr& top); - String name; //!< Name of the layer instance, can be used for logging or other internal purposes. - String type; //!< Type name which was used for creating layer by layer factory. + /** + * @brief Returns parameters of layers with channel-wise multiplication and addition. + * @param[out] scale Channel-wise multipliers. Total number of values should + * be equal to number of channels. + * @param[out] shift Channel-wise offsets. Total number of values should + * be equal to number of channels. + * + * Some layers can fuse their transformations with further layers. + * In example, convolution + batch normalization. This way base layer + * use weights from layer after it. Fused layer is skipped. + * By default, @p scale and @p shift are empty that means layer has no + * element-wise multiplications or additions. + */ + virtual void getScaleShift(Mat& scale, Mat& shift) const; + + /** + * @brief "Deattaches" all the layers, attached to particular layer. + */ + virtual void unsetAttached(); + + virtual bool getMemoryShapes(const std::vector &inputs, + const int requiredOutputs, + std::vector &outputs, + std::vector &internals) const; + virtual int64 getFLOPS(const std::vector &inputs, + const std::vector &outputs) const {CV_UNUSED(inputs); CV_UNUSED(outputs); return 0;} + + CV_PROP String name; //!< Name of the layer instance, can be used for logging or other internal purposes. + CV_PROP String type; //!< Type name which was used for creating layer by layer factory. + CV_PROP int preferableTarget; //!< prefer target for layer forwarding Layer(); - explicit Layer(const LayerParams ¶ms); //!< Initialize only #name, #type and #blobs fields. + explicit Layer(const LayerParams ¶ms); //!< Initializes only #name, #type and #blobs fields. + void setParamsFrom(const LayerParams ¶ms); //!< Initializes only #name, #type and #blobs fields. virtual ~Layer(); }; @@ -130,13 +387,53 @@ namespace dnn //! This namespace is used for dnn module functionlaity. * * This class supports reference counting of its instances, i. e. copies point to the same instance. */ - class CV_EXPORTS Net + class CV_EXPORTS_W_SIMPLE Net { public: - Net(); //!< Default constructor. - ~Net(); //!< Destructor frees the net only if there aren't references to the net anymore. + CV_WRAP Net(); //!< Default constructor. + CV_WRAP ~Net(); //!< Destructor frees the net only if there aren't references to the net anymore. + + /** @brief Create a network from Intel's Model Optimizer intermediate representation (IR). + * @param[in] xml XML configuration file with network's topology. + * @param[in] bin Binary file with trained weights. + * Networks imported from Intel's Model Optimizer are launched in Intel's Inference Engine + * backend. + */ + CV_WRAP static Net readFromModelOptimizer(const String& xml, const String& bin); + /** @brief Create a network from Intel's Model Optimizer in-memory buffers with intermediate representation (IR). + * @param[in] bufferModelConfig buffer with model's configuration. + * @param[in] bufferWeights buffer with model's trained weights. + * @returns Net object. + */ + CV_WRAP static + Net readFromModelOptimizer(const std::vector& bufferModelConfig, const std::vector& bufferWeights); + + /** @brief Create a network from Intel's Model Optimizer in-memory buffers with intermediate representation (IR). + * @param[in] bufferModelConfigPtr buffer pointer of model's configuration. + * @param[in] bufferModelConfigSize buffer size of model's configuration. + * @param[in] bufferWeightsPtr buffer pointer of model's trained weights. + * @param[in] bufferWeightsSize buffer size of model's trained weights. + * @returns Net object. + */ + static + Net readFromModelOptimizer(const uchar* bufferModelConfigPtr, size_t bufferModelConfigSize, + const uchar* bufferWeightsPtr, size_t bufferWeightsSize); + + /** Returns true if there are no layers in the network. */ + CV_WRAP bool empty() const; + + /** @brief Dump net to String + * @returns String with structure, hyperparameters, backend, target and fusion + * Call method after setInput(). To see correct backend, target and fusion run after forward(). + */ + CV_WRAP String dump(); + /** @brief Dump net structure, hyperparameters, backend, target and fusion to dot file + * @param path path to output file with .dot extension + * @see dump() + */ + CV_WRAP void dumpToFile(const String& path); /** @brief Adds new layer to the net. * @param name unique name of the adding layer. * @param type typename of the adding layer (type must be registered in LayerRegister). @@ -152,20 +449,25 @@ namespace dnn //! This namespace is used for dnn module functionlaity. /** @brief Converts string name of the layer to the integer identifier. * @returns id of the layer, or -1 if the layer wasn't found. */ - int getLayerId(const String &layer); + CV_WRAP int getLayerId(const String &layer); + + CV_WRAP std::vector getLayerNames() const; /** @brief Container for strings and integers. */ typedef DictValue LayerId; - /** @brief Delete layer for the network (not implemented yet) */ - void deleteLayer(LayerId layer); + /** @brief Returns pointer to layer with specified id or name which the network use. */ + CV_WRAP Ptr getLayer(LayerId layerId); + + /** @brief Returns pointers to input layers of specific layer. */ + std::vector > getLayerInputs(LayerId layerId); // FIXIT: CV_WRAP /** @brief Connects output of the first layer to input of the second layer. * @param outPin descriptor of the first layer output. * @param inpPin descriptor of the second layer input. * * Descriptors have the following template <layer_name>[.input_number]: - * - the first part of the template layer_name is sting name of the added layer. + * - the first part of the template layer_name is string name of the added layer. * If this part is empty then the network input pseudo layer will be used; * - the second optional part of the template input_number * is either number of the layer input, either label one. @@ -173,54 +475,118 @@ namespace dnn //! This namespace is used for dnn module functionlaity. * * @see setNetInputs(), Layer::inputNameToIndex(), Layer::outputNameToIndex() */ - void connect(String outPin, String inpPin); + CV_WRAP void connect(String outPin, String inpPin); + /** @brief Connects #@p outNum output of the first layer to #@p inNum input of the second layer. * @param outLayerId identifier of the first layer - * @param inpLayerId identifier of the second layer * @param outNum number of the first layer output + * @param inpLayerId identifier of the second layer * @param inpNum number of the second layer input */ void connect(int outLayerId, int outNum, int inpLayerId, int inpNum); - /** @brief Sets ouputs names of the network input pseudo layer. + + /** @brief Sets outputs names of the network input pseudo layer. * * Each net always has special own the network input pseudo layer with id=0. * This layer stores the user blobs only and don't make any computations. * In fact, this layer provides the only way to pass user data into the network. * As any other layer, this layer can label its outputs and this function provides an easy way to do this. */ - void setNetInputs(const std::vector &inputBlobNames); + CV_WRAP void setInputsNames(const std::vector &inputBlobNames); - /** @brief Runs forward pass for the whole network */ - void forward(); - /** @brief Runs forward pass to compute output of layer @p toLayer */ - void forward(LayerId toLayer); - /** @brief Runs forward pass to compute output of layer @p toLayer, but computations start from @p startLayer */ - void forward(LayerId startLayer, LayerId toLayer); - /** @overload */ - void forward(const std::vector &startLayers, const std::vector &toLayers); + /** @brief Specify shape of network input. + */ + CV_WRAP void setInputShape(const String &inputName, const MatShape& shape); - //TODO: - /** @brief Optimized forward. - * @warning Not implemented yet. - * @details Makes forward only those layers which weren't changed after previous forward(). + /** @brief Runs forward pass to compute output of layer with name @p outputName. + * @param outputName name for layer which output is needed to get + * @return blob for first output of specified layer. + * @details By default runs forward pass for the whole network. */ - void forwardOpt(LayerId toLayer); - /** @overload */ - void forwardOpt(const std::vector &toLayers); + CV_WRAP Mat forward(const String& outputName = String()); - /** @brief Sets the new value for the layer output blob - * @param outputName descriptor of the updating layer output blob. - * @param blob new blob. - * @see connect(String, String) to know format of the descriptor. - * @note If updating blob is not empty then @p blob must have the same shape, - * because network reshaping is not implemented yet. + /** @brief Runs forward pass to compute output of layer with name @p outputName. + * @param outputName name for layer which output is needed to get + * @details By default runs forward pass for the whole network. + * + * This is an asynchronous version of forward(const String&). + * dnn::DNN_BACKEND_INFERENCE_ENGINE backend is required. + */ + CV_WRAP AsyncArray forwardAsync(const String& outputName = String()); + + /** @brief Runs forward pass to compute output of layer with name @p outputName. + * @param outputBlobs contains all output blobs for specified layer. + * @param outputName name for layer which output is needed to get + * @details If @p outputName is empty, runs forward pass for the whole network. + */ + CV_WRAP void forward(OutputArrayOfArrays outputBlobs, const String& outputName = String()); + + /** @brief Runs forward pass to compute outputs of layers listed in @p outBlobNames. + * @param outputBlobs contains blobs for first outputs of specified layers. + * @param outBlobNames names for layers which outputs are needed to get */ - void setBlob(String outputName, const Blob &blob); - /** @brief Returns the layer output blob. - * @param outputName the descriptor of the returning layer output blob. - * @see connect(String, String) + CV_WRAP void forward(OutputArrayOfArrays outputBlobs, + const std::vector& outBlobNames); + + /** @brief Runs forward pass to compute outputs of layers listed in @p outBlobNames. + * @param outputBlobs contains all output blobs for each layer specified in @p outBlobNames. + * @param outBlobNames names for layers which outputs are needed to get + */ + CV_WRAP_AS(forwardAndRetrieve) void forward(CV_OUT std::vector >& outputBlobs, + const std::vector& outBlobNames); + + /** + * @brief Compile Halide layers. + * @param[in] scheduler Path to YAML file with scheduling directives. + * @see setPreferableBackend + * + * Schedule layers that support Halide backend. Then compile them for + * specific target. For layers that not represented in scheduling file + * or if no manual scheduling used at all, automatic scheduling will be applied. */ - Blob getBlob(String outputName); + CV_WRAP void setHalideScheduler(const String& scheduler); + + /** + * @brief Ask network to use specific computation backend where it supported. + * @param[in] backendId backend identifier. + * @see Backend + * + * If OpenCV is compiled with Intel's Inference Engine library, DNN_BACKEND_DEFAULT + * means DNN_BACKEND_INFERENCE_ENGINE. Otherwise it equals to DNN_BACKEND_OPENCV. + */ + CV_WRAP void setPreferableBackend(int backendId); + + /** + * @brief Ask network to make computations on specific target device. + * @param[in] targetId target identifier. + * @see Target + * + * List of supported combinations backend / target: + * | | DNN_BACKEND_OPENCV | DNN_BACKEND_INFERENCE_ENGINE | DNN_BACKEND_HALIDE | DNN_BACKEND_CUDA | + * |------------------------|--------------------|------------------------------|--------------------|-------------------| + * | DNN_TARGET_CPU | + | + | + | | + * | DNN_TARGET_OPENCL | + | + | + | | + * | DNN_TARGET_OPENCL_FP16 | + | + | | | + * | DNN_TARGET_MYRIAD | | + | | | + * | DNN_TARGET_FPGA | | + | | | + * | DNN_TARGET_CUDA | | | | + | + * | DNN_TARGET_CUDA_FP16 | | | | + | + */ + CV_WRAP void setPreferableTarget(int targetId); + + /** @brief Sets the new input value for the network + * @param blob A new blob. Should have CV_32F or CV_8U depth. + * @param name A name of input layer. + * @param scalefactor An optional normalization scale. + * @param mean An optional mean subtraction values. + * @see connect(String, String) to know format of the descriptor. + * + * If scale or mean values are specified, a final input blob is computed + * as: + * \f[input(n,c,h,w) = scalefactor \times (blob(n,c,h,w) - mean_c)\f] + */ + CV_WRAP void setInput(InputArray blob, const String& name = "", + double scalefactor = 1.0, const Scalar& mean = Scalar()); /** @brief Sets the new value for the learned param of the layer. * @param layer name or id of the layer. @@ -230,47 +596,233 @@ namespace dnn //! This namespace is used for dnn module functionlaity. * @note If shape of the new blob differs from the previous shape, * then the following forward pass may fail. */ - void setParam(LayerId layer, int numParam, const Blob &blob); + CV_WRAP void setParam(LayerId layer, int numParam, const Mat &blob); + /** @brief Returns parameter blob of the layer. * @param layer name or id of the layer. * @param numParam index of the layer parameter in the Layer::blobs array. * @see Layer::blobs */ - Blob getParam(LayerId layer, int numParam = 0); + CV_WRAP Mat getParam(LayerId layer, int numParam = 0); - private: + /** @brief Returns indexes of layers with unconnected outputs. + */ + CV_WRAP std::vector getUnconnectedOutLayers() const; + + /** @brief Returns names of layers with unconnected outputs. + */ + CV_WRAP std::vector getUnconnectedOutLayersNames() const; + + /** @brief Returns input and output shapes for all layers in loaded model; + * preliminary inferencing isn't necessary. + * @param netInputShapes shapes for all input blobs in net input layer. + * @param layersIds output parameter for layer IDs. + * @param inLayersShapes output parameter for input layers shapes; + * order is the same as in layersIds + * @param outLayersShapes output parameter for output layers shapes; + * order is the same as in layersIds + */ + CV_WRAP void getLayersShapes(const std::vector& netInputShapes, + CV_OUT std::vector& layersIds, + CV_OUT std::vector >& inLayersShapes, + CV_OUT std::vector >& outLayersShapes) const; + + /** @overload */ + CV_WRAP void getLayersShapes(const MatShape& netInputShape, + CV_OUT std::vector& layersIds, + CV_OUT std::vector >& inLayersShapes, + CV_OUT std::vector >& outLayersShapes) const; + + /** @brief Returns input and output shapes for layer with specified + * id in loaded model; preliminary inferencing isn't necessary. + * @param netInputShape shape input blob in net input layer. + * @param layerId id for layer. + * @param inLayerShapes output parameter for input layers shapes; + * order is the same as in layersIds + * @param outLayerShapes output parameter for output layers shapes; + * order is the same as in layersIds + */ + void getLayerShapes(const MatShape& netInputShape, + const int layerId, + CV_OUT std::vector& inLayerShapes, + CV_OUT std::vector& outLayerShapes) const; // FIXIT: CV_WRAP + + /** @overload */ + void getLayerShapes(const std::vector& netInputShapes, + const int layerId, + CV_OUT std::vector& inLayerShapes, + CV_OUT std::vector& outLayerShapes) const; // FIXIT: CV_WRAP + + /** @brief Computes FLOP for whole loaded model with specified input shapes. + * @param netInputShapes vector of shapes for all net inputs. + * @returns computed FLOP. + */ + CV_WRAP int64 getFLOPS(const std::vector& netInputShapes) const; + /** @overload */ + CV_WRAP int64 getFLOPS(const MatShape& netInputShape) const; + /** @overload */ + CV_WRAP int64 getFLOPS(const int layerId, + const std::vector& netInputShapes) const; + /** @overload */ + CV_WRAP int64 getFLOPS(const int layerId, + const MatShape& netInputShape) const; + + /** @brief Returns list of types for layer used in model. + * @param layersTypes output parameter for returning types. + */ + CV_WRAP void getLayerTypes(CV_OUT std::vector& layersTypes) const; + + /** @brief Returns count of layers of specified type. + * @param layerType type. + * @returns count of layers + */ + CV_WRAP int getLayersCount(const String& layerType) const; + + /** @brief Computes bytes number which are required to store + * all weights and intermediate blobs for model. + * @param netInputShapes vector of shapes for all net inputs. + * @param weights output parameter to store resulting bytes for weights. + * @param blobs output parameter to store resulting bytes for intermediate blobs. + */ + void getMemoryConsumption(const std::vector& netInputShapes, + CV_OUT size_t& weights, CV_OUT size_t& blobs) const; // FIXIT: CV_WRAP + /** @overload */ + CV_WRAP void getMemoryConsumption(const MatShape& netInputShape, + CV_OUT size_t& weights, CV_OUT size_t& blobs) const; + /** @overload */ + CV_WRAP void getMemoryConsumption(const int layerId, + const std::vector& netInputShapes, + CV_OUT size_t& weights, CV_OUT size_t& blobs) const; + /** @overload */ + CV_WRAP void getMemoryConsumption(const int layerId, + const MatShape& netInputShape, + CV_OUT size_t& weights, CV_OUT size_t& blobs) const; + + /** @brief Computes bytes number which are required to store + * all weights and intermediate blobs for each layer. + * @param netInputShapes vector of shapes for all net inputs. + * @param layerIds output vector to save layer IDs. + * @param weights output parameter to store resulting bytes for weights. + * @param blobs output parameter to store resulting bytes for intermediate blobs. + */ + void getMemoryConsumption(const std::vector& netInputShapes, + CV_OUT std::vector& layerIds, + CV_OUT std::vector& weights, + CV_OUT std::vector& blobs) const; // FIXIT: CV_WRAP + /** @overload */ + void getMemoryConsumption(const MatShape& netInputShape, + CV_OUT std::vector& layerIds, + CV_OUT std::vector& weights, + CV_OUT std::vector& blobs) const; // FIXIT: CV_WRAP + + /** @brief Enables or disables layer fusion in the network. + * @param fusion true to enable the fusion, false to disable. The fusion is enabled by default. + */ + CV_WRAP void enableFusion(bool fusion); + + /** @brief Returns overall time for inference and timings (in ticks) for layers. + * Indexes in returned vector correspond to layers ids. Some layers can be fused with others, + * in this case zero ticks count will be return for that skipped layers. + * @param timings vector for tick timings for all layers. + * @return overall ticks for model inference. + */ + CV_WRAP int64 getPerfProfile(CV_OUT std::vector& timings); + private: struct Impl; Ptr impl; }; - /** @brief Small interface class for loading trained serialized models of different dnn-frameworks. */ - class Importer - { - public: - - /** @brief Adds loaded layers into the @p net and sets connetions between them. */ - virtual void populateNet(Net net) = 0; + /** @brief Reads a network model stored in Darknet model files. + * @param cfgFile path to the .cfg file with text description of the network architecture. + * @param darknetModel path to the .weights file with learned network. + * @returns Network object that ready to do forward, throw an exception in failure cases. + * @returns Net object. + */ + CV_EXPORTS_W Net readNetFromDarknet(const String &cfgFile, const String &darknetModel = String()); - virtual ~Importer(); - }; + /** @brief Reads a network model stored in Darknet model files. + * @param bufferCfg A buffer contains a content of .cfg file with text description of the network architecture. + * @param bufferModel A buffer contains a content of .weights file with learned network. + * @returns Net object. + */ + CV_EXPORTS_W Net readNetFromDarknet(const std::vector& bufferCfg, + const std::vector& bufferModel = std::vector()); - /** @brief Creates the importer of Caffe framework network. - * @param prototxt path to the .prototxt file with text description of the network architecture. - * @param caffeModel path to the .caffemodel file with learned network. - * @returns Pointer to the created importer, NULL in failure cases. + /** @brief Reads a network model stored in Darknet model files. + * @param bufferCfg A buffer contains a content of .cfg file with text description of the network architecture. + * @param lenCfg Number of bytes to read from bufferCfg + * @param bufferModel A buffer contains a content of .weights file with learned network. + * @param lenModel Number of bytes to read from bufferModel + * @returns Net object. */ - CV_EXPORTS Ptr createCaffeImporter(const String &prototxt, const String &caffeModel = String()); + CV_EXPORTS Net readNetFromDarknet(const char *bufferCfg, size_t lenCfg, + const char *bufferModel = NULL, size_t lenModel = 0); + + /** @brief Reads a network model stored in Caffe framework's format. + * @param prototxt path to the .prototxt file with text description of the network architecture. + * @param caffeModel path to the .caffemodel file with learned network. + * @returns Net object. + */ + CV_EXPORTS_W Net readNetFromCaffe(const String &prototxt, const String &caffeModel = String()); + + /** @brief Reads a network model stored in Caffe model in memory. + * @param bufferProto buffer containing the content of the .prototxt file + * @param bufferModel buffer containing the content of the .caffemodel file + * @returns Net object. + */ + CV_EXPORTS_W Net readNetFromCaffe(const std::vector& bufferProto, + const std::vector& bufferModel = std::vector()); + + /** @brief Reads a network model stored in Caffe model in memory. + * @details This is an overloaded member function, provided for convenience. + * It differs from the above function only in what argument(s) it accepts. + * @param bufferProto buffer containing the content of the .prototxt file + * @param lenProto length of bufferProto + * @param bufferModel buffer containing the content of the .caffemodel file + * @param lenModel length of bufferModel + * @returns Net object. + */ + CV_EXPORTS Net readNetFromCaffe(const char *bufferProto, size_t lenProto, + const char *bufferModel = NULL, size_t lenModel = 0); + + /** @brief Reads a network model stored in TensorFlow framework's format. + * @param model path to the .pb file with binary protobuf description of the network architecture + * @param config path to the .pbtxt file that contains text graph definition in protobuf format. + * Resulting Net object is built by text graph using weights from a binary one that + * let us make it more flexible. + * @returns Net object. + */ + CV_EXPORTS_W Net readNetFromTensorflow(const String &model, const String &config = String()); + + /** @brief Reads a network model stored in TensorFlow framework's format. + * @param bufferModel buffer containing the content of the pb file + * @param bufferConfig buffer containing the content of the pbtxt file + * @returns Net object. + */ + CV_EXPORTS_W Net readNetFromTensorflow(const std::vector& bufferModel, + const std::vector& bufferConfig = std::vector()); - /** @brief Creates the importer of Torch7 framework network. - * @param filename path to the file, dumped from Torch by using torch.save() function. + /** @brief Reads a network model stored in TensorFlow framework's format. + * @details This is an overloaded member function, provided for convenience. + * It differs from the above function only in what argument(s) it accepts. + * @param bufferModel buffer containing the content of the pb file + * @param lenModel length of bufferModel + * @param bufferConfig buffer containing the content of the pbtxt file + * @param lenConfig length of bufferConfig + */ + CV_EXPORTS Net readNetFromTensorflow(const char *bufferModel, size_t lenModel, + const char *bufferConfig = NULL, size_t lenConfig = 0); + + /** + * @brief Reads a network model stored in Torch7 framework's format. + * @param model path to the file, dumped from Torch by using torch.save() function. * @param isBinary specifies whether the network was serialized in ascii mode or binary. - * @returns Pointer to the created importer, NULL in failure cases. - * - * @warning Torch7 importer is experimental now, you need explicitly set CMake opencv_dnn_BUILD_TORCH_IMPORTER flag to compile its. + * @param evaluate specifies testing phase of network. If true, it's similar to evaluate() method in Torch. + * @returns Net object. * - * @note Ascii mode of Torch serializer is more preferable, because binary mode extensively use long type of C language, - * which has different bit-length on different systems. + * @note Ascii mode of Torch serializer is more preferable, because binary mode extensively use `long` type of C language, + * which has various bit-length on different systems. * * The loading file must contain serialized nn.Module object * with importing network. Try to eliminate a custom objects from serialazing data to avoid importing errors. @@ -284,21 +836,460 @@ namespace dnn //! This namespace is used for dnn module functionlaity. * - nn.SpatialMaxPooling, nn.SpatialAveragePooling * - nn.ReLU, nn.TanH, nn.Sigmoid * - nn.Reshape + * - nn.SoftMax, nn.LogSoftMax * * Also some equivalents of these classes from cunn, cudnn, and fbcunn may be successfully imported. */ - CV_EXPORTS Ptr createTorchImporter(const String &filename, bool isBinary = true); + CV_EXPORTS_W Net readNetFromTorch(const String &model, bool isBinary = true, bool evaluate = true); + + /** + * @brief Read deep learning network represented in one of the supported formats. + * @param[in] model Binary file contains trained weights. The following file + * extensions are expected for models from different frameworks: + * * `*.caffemodel` (Caffe, http://caffe.berkeleyvision.org/) + * * `*.pb` (TensorFlow, https://www.tensorflow.org/) + * * `*.t7` | `*.net` (Torch, http://torch.ch/) + * * `*.weights` (Darknet, https://pjreddie.com/darknet/) + * * `*.bin` (DLDT, https://software.intel.com/openvino-toolkit) + * * `*.onnx` (ONNX, https://onnx.ai/) + * @param[in] config Text file contains network configuration. It could be a + * file with the following extensions: + * * `*.prototxt` (Caffe, http://caffe.berkeleyvision.org/) + * * `*.pbtxt` (TensorFlow, https://www.tensorflow.org/) + * * `*.cfg` (Darknet, https://pjreddie.com/darknet/) + * * `*.xml` (DLDT, https://software.intel.com/openvino-toolkit) + * @param[in] framework Explicit framework name tag to determine a format. + * @returns Net object. + * + * This function automatically detects an origin framework of trained model + * and calls an appropriate function such @ref readNetFromCaffe, @ref readNetFromTensorflow, + * @ref readNetFromTorch or @ref readNetFromDarknet. An order of @p model and @p config + * arguments does not matter. + */ + CV_EXPORTS_W Net readNet(const String& model, const String& config = "", const String& framework = ""); + + /** + * @brief Read deep learning network represented in one of the supported formats. + * @details This is an overloaded member function, provided for convenience. + * It differs from the above function only in what argument(s) it accepts. + * @param[in] framework Name of origin framework. + * @param[in] bufferModel A buffer with a content of binary file with weights + * @param[in] bufferConfig A buffer with a content of text file contains network configuration. + * @returns Net object. + */ + CV_EXPORTS_W Net readNet(const String& framework, const std::vector& bufferModel, + const std::vector& bufferConfig = std::vector()); /** @brief Loads blob which was serialized as torch.Tensor object of Torch7 framework. - * @warning This function has the same limitations as createTorchImporter(). + * @warning This function has the same limitations as readNetFromTorch(). + */ + CV_EXPORTS_W Mat readTorchBlob(const String &filename, bool isBinary = true); + + /** @brief Load a network from Intel's Model Optimizer intermediate representation. + * @param[in] xml XML configuration file with network's topology. + * @param[in] bin Binary file with trained weights. + * @returns Net object. + * Networks imported from Intel's Model Optimizer are launched in Intel's Inference Engine + * backend. + */ + CV_EXPORTS_W + Net readNetFromModelOptimizer(const String &xml, const String &bin); + + /** @brief Load a network from Intel's Model Optimizer intermediate representation. + * @param[in] bufferModelConfig Buffer contains XML configuration with network's topology. + * @param[in] bufferWeights Buffer contains binary data with trained weights. + * @returns Net object. + * Networks imported from Intel's Model Optimizer are launched in Intel's Inference Engine + * backend. */ - CV_EXPORTS Blob readTorchBlob(const String &filename, bool isBinary = true); + CV_EXPORTS_W + Net readNetFromModelOptimizer(const std::vector& bufferModelConfig, const std::vector& bufferWeights); + + /** @brief Load a network from Intel's Model Optimizer intermediate representation. + * @param[in] bufferModelConfigPtr Pointer to buffer which contains XML configuration with network's topology. + * @param[in] bufferModelConfigSize Binary size of XML configuration data. + * @param[in] bufferWeightsPtr Pointer to buffer which contains binary data with trained weights. + * @param[in] bufferWeightsSize Binary size of trained weights data. + * @returns Net object. + * Networks imported from Intel's Model Optimizer are launched in Intel's Inference Engine + * backend. + */ + CV_EXPORTS + Net readNetFromModelOptimizer(const uchar* bufferModelConfigPtr, size_t bufferModelConfigSize, + const uchar* bufferWeightsPtr, size_t bufferWeightsSize); + + /** @brief Reads a network model ONNX. + * @param onnxFile path to the .onnx file with text description of the network architecture. + * @returns Network object that ready to do forward, throw an exception in failure cases. + */ + CV_EXPORTS_W Net readNetFromONNX(const String &onnxFile); + + /** @brief Reads a network model from ONNX + * in-memory buffer. + * @param buffer memory address of the first byte of the buffer. + * @param sizeBuffer size of the buffer. + * @returns Network object that ready to do forward, throw an exception + * in failure cases. + */ + CV_EXPORTS Net readNetFromONNX(const char* buffer, size_t sizeBuffer); + + /** @brief Reads a network model from ONNX + * in-memory buffer. + * @param buffer in-memory buffer that stores the ONNX model bytes. + * @returns Network object that ready to do forward, throw an exception + * in failure cases. + */ + CV_EXPORTS_W Net readNetFromONNX(const std::vector& buffer); + + /** @brief Creates blob from .pb file. + * @param path to the .pb file with input tensor. + * @returns Mat. + */ + CV_EXPORTS_W Mat readTensorFromONNX(const String& path); + + /** @brief Creates 4-dimensional blob from image. Optionally resizes and crops @p image from center, + * subtract @p mean values, scales values by @p scalefactor, swap Blue and Red channels. + * @param image input image (with 1-, 3- or 4-channels). + * @param size spatial size for output image + * @param mean scalar with mean values which are subtracted from channels. Values are intended + * to be in (mean-R, mean-G, mean-B) order if @p image has BGR ordering and @p swapRB is true. + * @param scalefactor multiplier for @p image values. + * @param swapRB flag which indicates that swap first and last channels + * in 3-channel image is necessary. + * @param crop flag which indicates whether image will be cropped after resize or not + * @param ddepth Depth of output blob. Choose CV_32F or CV_8U. + * @details if @p crop is true, input image is resized so one side after resize is equal to corresponding + * dimension in @p size and another one is equal or larger. Then, crop from the center is performed. + * If @p crop is false, direct resize without cropping and preserving aspect ratio is performed. + * @returns 4-dimensional Mat with NCHW dimensions order. + */ + CV_EXPORTS_W Mat blobFromImage(InputArray image, double scalefactor=1.0, const Size& size = Size(), + const Scalar& mean = Scalar(), bool swapRB=false, bool crop=false, + int ddepth=CV_32F); + + /** @brief Creates 4-dimensional blob from image. + * @details This is an overloaded member function, provided for convenience. + * It differs from the above function only in what argument(s) it accepts. + */ + CV_EXPORTS void blobFromImage(InputArray image, OutputArray blob, double scalefactor=1.0, + const Size& size = Size(), const Scalar& mean = Scalar(), + bool swapRB=false, bool crop=false, int ddepth=CV_32F); + + + /** @brief Creates 4-dimensional blob from series of images. Optionally resizes and + * crops @p images from center, subtract @p mean values, scales values by @p scalefactor, + * swap Blue and Red channels. + * @param images input images (all with 1-, 3- or 4-channels). + * @param size spatial size for output image + * @param mean scalar with mean values which are subtracted from channels. Values are intended + * to be in (mean-R, mean-G, mean-B) order if @p image has BGR ordering and @p swapRB is true. + * @param scalefactor multiplier for @p images values. + * @param swapRB flag which indicates that swap first and last channels + * in 3-channel image is necessary. + * @param crop flag which indicates whether image will be cropped after resize or not + * @param ddepth Depth of output blob. Choose CV_32F or CV_8U. + * @details if @p crop is true, input image is resized so one side after resize is equal to corresponding + * dimension in @p size and another one is equal or larger. Then, crop from the center is performed. + * If @p crop is false, direct resize without cropping and preserving aspect ratio is performed. + * @returns 4-dimensional Mat with NCHW dimensions order. + */ + CV_EXPORTS_W Mat blobFromImages(InputArrayOfArrays images, double scalefactor=1.0, + Size size = Size(), const Scalar& mean = Scalar(), bool swapRB=false, bool crop=false, + int ddepth=CV_32F); + + /** @brief Creates 4-dimensional blob from series of images. + * @details This is an overloaded member function, provided for convenience. + * It differs from the above function only in what argument(s) it accepts. + */ + CV_EXPORTS void blobFromImages(InputArrayOfArrays images, OutputArray blob, + double scalefactor=1.0, Size size = Size(), + const Scalar& mean = Scalar(), bool swapRB=false, bool crop=false, + int ddepth=CV_32F); + + /** @brief Parse a 4D blob and output the images it contains as 2D arrays through a simpler data structure + * (std::vector). + * @param[in] blob_ 4 dimensional array (images, channels, height, width) in floating point precision (CV_32F) from + * which you would like to extract the images. + * @param[out] images_ array of 2D Mat containing the images extracted from the blob in floating point precision + * (CV_32F). They are non normalized neither mean added. The number of returned images equals the first dimension + * of the blob (batch size). Every image has a number of channels equals to the second dimension of the blob (depth). + */ + CV_EXPORTS_W void imagesFromBlob(const cv::Mat& blob_, OutputArrayOfArrays images_); + + /** @brief Convert all weights of Caffe network to half precision floating point. + * @param src Path to origin model from Caffe framework contains single + * precision floating point weights (usually has `.caffemodel` extension). + * @param dst Path to destination model with updated weights. + * @param layersTypes Set of layers types which parameters will be converted. + * By default, converts only Convolutional and Fully-Connected layers' + * weights. + * + * @note Shrinked model has no origin float32 weights so it can't be used + * in origin Caffe framework anymore. However the structure of data + * is taken from NVidia's Caffe fork: https://github.com/NVIDIA/caffe. + * So the resulting model may be used there. + */ + CV_EXPORTS_W void shrinkCaffeModel(const String& src, const String& dst, + const std::vector& layersTypes = std::vector()); + + /** @brief Create a text representation for a binary network stored in protocol buffer format. + * @param[in] model A path to binary network. + * @param[in] output A path to output text file to be created. + * + * @note To reduce output file size, trained weights are not included. + */ + CV_EXPORTS_W void writeTextGraph(const String& model, const String& output); + + /** @brief Performs non maximum suppression given boxes and corresponding scores. + + * @param bboxes a set of bounding boxes to apply NMS. + * @param scores a set of corresponding confidences. + * @param score_threshold a threshold used to filter boxes by score. + * @param nms_threshold a threshold used in non maximum suppression. + * @param indices the kept indices of bboxes after NMS. + * @param eta a coefficient in adaptive threshold formula: \f$nms\_threshold_{i+1}=eta\cdot nms\_threshold_i\f$. + * @param top_k if `>0`, keep at most @p top_k picked indices. + */ + CV_EXPORTS void NMSBoxes(const std::vector& bboxes, const std::vector& scores, + const float score_threshold, const float nms_threshold, + CV_OUT std::vector& indices, + const float eta = 1.f, const int top_k = 0); + + CV_EXPORTS_W void NMSBoxes(const std::vector& bboxes, const std::vector& scores, + const float score_threshold, const float nms_threshold, + CV_OUT std::vector& indices, + const float eta = 1.f, const int top_k = 0); + + CV_EXPORTS_AS(NMSBoxesRotated) void NMSBoxes(const std::vector& bboxes, const std::vector& scores, + const float score_threshold, const float nms_threshold, + CV_OUT std::vector& indices, + const float eta = 1.f, const int top_k = 0); + + + /** @brief This class is presented high-level API for neural networks. + * + * Model allows to set params for preprocessing input image. + * Model creates net from file with trained weights and config, + * sets preprocessing input and runs forward pass. + */ + class CV_EXPORTS_W_SIMPLE Model : public Net + { + public: + /** + * @brief Default constructor. + */ + Model(); + + /** + * @brief Create model from deep learning network represented in one of the supported formats. + * An order of @p model and @p config arguments does not matter. + * @param[in] model Binary file contains trained weights. + * @param[in] config Text file contains network configuration. + */ + CV_WRAP Model(const String& model, const String& config = ""); + + /** + * @brief Create model from deep learning network. + * @param[in] network Net object. + */ + CV_WRAP Model(const Net& network); + + /** @brief Set input size for frame. + * @param[in] size New input size. + * @note If shape of the new blob less than 0, then frame size not change. + */ + CV_WRAP Model& setInputSize(const Size& size); + + /** @brief Set input size for frame. + * @param[in] width New input width. + * @param[in] height New input height. + * @note If shape of the new blob less than 0, + * then frame size not change. + */ + CV_WRAP Model& setInputSize(int width, int height); + + /** @brief Set mean value for frame. + * @param[in] mean Scalar with mean values which are subtracted from channels. + */ + CV_WRAP Model& setInputMean(const Scalar& mean); + + /** @brief Set scalefactor value for frame. + * @param[in] scale Multiplier for frame values. + */ + CV_WRAP Model& setInputScale(double scale); + + /** @brief Set flag crop for frame. + * @param[in] crop Flag which indicates whether image will be cropped after resize or not. + */ + CV_WRAP Model& setInputCrop(bool crop); + + /** @brief Set flag swapRB for frame. + * @param[in] swapRB Flag which indicates that swap first and last channels. + */ + CV_WRAP Model& setInputSwapRB(bool swapRB); + + /** @brief Set preprocessing parameters for frame. + * @param[in] size New input size. + * @param[in] mean Scalar with mean values which are subtracted from channels. + * @param[in] scale Multiplier for frame values. + * @param[in] swapRB Flag which indicates that swap first and last channels. + * @param[in] crop Flag which indicates whether image will be cropped after resize or not. + * blob(n, c, y, x) = scale * resize( frame(y, x, c) ) - mean(c) ) + */ + CV_WRAP void setInputParams(double scale = 1.0, const Size& size = Size(), + const Scalar& mean = Scalar(), bool swapRB = false, bool crop = false); + + /** @brief Given the @p input frame, create input blob, run net and return the output @p blobs. + * @param[in] frame The input image. + * @param[out] outs Allocated output blobs, which will store results of the computation. + */ + CV_WRAP void predict(InputArray frame, OutputArrayOfArrays outs); + + protected: + struct Impl; + Ptr impl; + }; + + /** @brief This class represents high-level API for classification models. + * + * ClassificationModel allows to set params for preprocessing input image. + * ClassificationModel creates net from file with trained weights and config, + * sets preprocessing input, runs forward pass and return top-1 prediction. + */ + class CV_EXPORTS_W_SIMPLE ClassificationModel : public Model + { + public: + /** + * @brief Create classification model from network represented in one of the supported formats. + * An order of @p model and @p config arguments does not matter. + * @param[in] model Binary file contains trained weights. + * @param[in] config Text file contains network configuration. + */ + CV_WRAP ClassificationModel(const String& model, const String& config = ""); + + /** + * @brief Create model from deep learning network. + * @param[in] network Net object. + */ + CV_WRAP ClassificationModel(const Net& network); + + /** @brief Given the @p input frame, create input blob, run net and return top-1 prediction. + * @param[in] frame The input image. + */ + std::pair classify(InputArray frame); + + /** @overload */ + CV_WRAP void classify(InputArray frame, CV_OUT int& classId, CV_OUT float& conf); + }; + + /** @brief This class represents high-level API for keypoints models + * + * KeypointsModel allows to set params for preprocessing input image. + * KeypointsModel creates net from file with trained weights and config, + * sets preprocessing input, runs forward pass and returns the x and y coordinates of each detected keypoint + */ + class CV_EXPORTS_W KeypointsModel: public Model + { + public: + /** + * @brief Create keypoints model from network represented in one of the supported formats. + * An order of @p model and @p config arguments does not matter. + * @param[in] model Binary file contains trained weights. + * @param[in] config Text file contains network configuration. + */ + CV_WRAP KeypointsModel(const String& model, const String& config = ""); + + /** + * @brief Create model from deep learning network. + * @param[in] network Net object. + */ + CV_WRAP KeypointsModel(const Net& network); + + /** @brief Given the @p input frame, create input blob, run net + * @param[in] frame The input image. + * @param thresh minimum confidence threshold to select a keypoint + * @returns a vector holding the x and y coordinates of each detected keypoint + * + */ + CV_WRAP std::vector estimate(InputArray frame, float thresh=0.5); + }; + + /** @brief This class represents high-level API for segmentation models + * + * SegmentationModel allows to set params for preprocessing input image. + * SegmentationModel creates net from file with trained weights and config, + * sets preprocessing input, runs forward pass and returns the class prediction for each pixel. + */ + class CV_EXPORTS_W SegmentationModel: public Model + { + public: + /** + * @brief Create segmentation model from network represented in one of the supported formats. + * An order of @p model and @p config arguments does not matter. + * @param[in] model Binary file contains trained weights. + * @param[in] config Text file contains network configuration. + */ + CV_WRAP SegmentationModel(const String& model, const String& config = ""); + + /** + * @brief Create model from deep learning network. + * @param[in] network Net object. + */ + CV_WRAP SegmentationModel(const Net& network); + + /** @brief Given the @p input frame, create input blob, run net + * @param[in] frame The input image. + * @param[out] mask Allocated class prediction for each pixel + */ + CV_WRAP void segment(InputArray frame, OutputArray mask); + }; + + /** @brief This class represents high-level API for object detection networks. + * + * DetectionModel allows to set params for preprocessing input image. + * DetectionModel creates net from file with trained weights and config, + * sets preprocessing input, runs forward pass and return result detections. + * For DetectionModel SSD, Faster R-CNN, YOLO topologies are supported. + */ + class CV_EXPORTS_W_SIMPLE DetectionModel : public Model + { + public: + /** + * @brief Create detection model from network represented in one of the supported formats. + * An order of @p model and @p config arguments does not matter. + * @param[in] model Binary file contains trained weights. + * @param[in] config Text file contains network configuration. + */ + CV_WRAP DetectionModel(const String& model, const String& config = ""); + + /** + * @brief Create model from deep learning network. + * @param[in] network Net object. + */ + CV_WRAP DetectionModel(const Net& network); + + /** @brief Given the @p input frame, create input blob, run net and return result detections. + * @param[in] frame The input image. + * @param[out] classIds Class indexes in result detection. + * @param[out] confidences A set of corresponding confidences. + * @param[out] boxes A set of bounding boxes. + * @param[in] confThreshold A threshold used to filter boxes by confidences. + * @param[in] nmsThreshold A threshold used in non maximum suppression. + */ + CV_WRAP void detect(InputArray frame, CV_OUT std::vector& classIds, + CV_OUT std::vector& confidences, CV_OUT std::vector& boxes, + float confThreshold = 0.5f, float nmsThreshold = 0.0f); + }; //! @} +CV__DNN_INLINE_NS_END } } #include #include -#endif /* __OPENCV_DNN_DNN_HPP__ */ +/// @deprecated Include this header directly from application. Automatic inclusion will be removed +#include + +#endif /* OPENCV_DNN_DNN_HPP */ diff --git a/IPL/include/opencv/opencv2/dnn/dnn.inl.hpp b/IPL/include/opencv/opencv2/dnn/dnn.inl.hpp index 300ae58..d6809ce 100644 --- a/IPL/include/opencv/opencv2/dnn/dnn.inl.hpp +++ b/IPL/include/opencv/opencv2/dnn/dnn.inl.hpp @@ -39,15 +39,14 @@ // //M*/ -#ifndef __OPENCV_DNN_DNN_INL_HPP__ -#define __OPENCV_DNN_DNN_INL_HPP__ +#ifndef OPENCV_DNN_DNN_INL_HPP +#define OPENCV_DNN_DNN_INL_HPP #include -namespace cv -{ -namespace dnn -{ +namespace cv { +namespace dnn { +CV__DNN_INLINE_NS_BEGIN template DictValue DictValue::arrayInt(TypeIter begin, int size) @@ -86,7 +85,7 @@ inline DictValue DictValue::get(int idx) const template<> inline int64 DictValue::get(int idx) const { - CV_Assert(idx == -1 && size() == 1 || idx >= 0 && idx < size()); + CV_Assert((idx == -1 && size() == 1) || (idx >= 0 && idx < size())); idx = (idx == -1) ? 0 : idx; if (type == Param::INT) @@ -103,9 +102,13 @@ inline int64 DictValue::get(int idx) const return (int64)doubleValue; } + else if (type == Param::STRING) + { + return std::atoi((*ps)[idx].c_str()); + } else { - CV_Assert(isInt() || isReal()); + CV_Assert(isInt() || isReal() || isString()); return 0; } } @@ -116,6 +119,11 @@ inline int DictValue::get(int idx) const return (int)get(idx); } +inline int DictValue::getIntValue(int idx) const +{ + return (int)get(idx); +} + template<> inline unsigned DictValue::get(int idx) const { @@ -131,7 +139,7 @@ inline bool DictValue::get(int idx) const template<> inline double DictValue::get(int idx) const { - CV_Assert(idx == -1 && size() == 1 || idx >= 0 && idx < size()); + CV_Assert((idx == -1 && size() == 1) || (idx >= 0 && idx < size())); idx = (idx == -1) ? 0 : idx; if (type == Param::REAL) @@ -142,13 +150,22 @@ inline double DictValue::get(int idx) const { return (double)(*pi)[idx]; } + else if (type == Param::STRING) + { + return std::atof((*ps)[idx].c_str()); + } else { - CV_Assert(isReal() || isInt()); + CV_Assert(isReal() || isInt() || isString()); return 0; } } +inline double DictValue::getRealValue(int idx) const +{ + return get(idx); +} + template<> inline float DictValue::get(int idx) const { @@ -159,10 +176,16 @@ template<> inline String DictValue::get(int idx) const { CV_Assert(isString()); - CV_Assert(idx == -1 && ps->size() == 1 || idx >= 0 && idx < (int)ps->size()); + CV_Assert((idx == -1 && ps->size() == 1) || (idx >= 0 && idx < (int)ps->size())); return (*ps)[(idx == -1) ? 0 : idx]; } + +inline String DictValue::getStringValue(int idx) const +{ + return get(idx); +} + inline void DictValue::release() { switch (type) @@ -176,6 +199,16 @@ inline void DictValue::release() case Param::REAL: delete pd; break; + case Param::BOOLEAN: + case Param::MAT: + case Param::MAT_VECTOR: + case Param::ALGORITHM: + case Param::FLOAT: + case Param::UNSIGNED_INT: + case Param::UINT64: + case Param::UCHAR: + case Param::SCALAR: + break; // unhandled } } @@ -246,17 +279,22 @@ inline int DictValue::size() const { case Param::INT: return (int)pi->size(); - break; case Param::STRING: return (int)ps->size(); - break; case Param::REAL: return (int)pd->size(); - break; - default: - CV_Error(Error::StsInternal, ""); - return -1; + case Param::BOOLEAN: + case Param::MAT: + case Param::MAT_VECTOR: + case Param::ALGORITHM: + case Param::FLOAT: + case Param::UNSIGNED_INT: + case Param::UINT64: + case Param::UCHAR: + case Param::SCALAR: + break; // unhandled } + CV_Error_(Error::StsInternal, ("Unhandled type (%d)", static_cast(type))); } inline std::ostream &operator<<(std::ostream &stream, const DictValue &dictv) @@ -287,7 +325,7 @@ inline std::ostream &operator<<(std::ostream &stream, const DictValue &dictv) ///////////////////////////////////////////////////////////////// -inline bool Dict::has(const String &key) +inline bool Dict::has(const String &key) const { return dict.count(key) != 0; } @@ -298,6 +336,12 @@ inline DictValue *Dict::ptr(const String &key) return (i == dict.end()) ? NULL : &i->second; } +inline const DictValue *Dict::ptr(const String &key) const +{ + _Dict::const_iterator i = dict.find(key); + return (i == dict.end()) ? NULL : &i->second; +} + inline const DictValue &Dict::get(const String &key) const { _Dict::const_iterator i = dict.find(key); @@ -336,6 +380,11 @@ inline const T &Dict::set(const String &key, const T &value) return value; } +inline void Dict::erase(const String &key) +{ + dict.erase(key); +} + inline std::ostream &operator<<(std::ostream &stream, const Dict &dict) { Dict::_Dict::const_iterator it; @@ -345,6 +394,17 @@ inline std::ostream &operator<<(std::ostream &stream, const Dict &dict) return stream; } +inline std::map::const_iterator Dict::begin() const +{ + return dict.begin(); +} + +inline std::map::const_iterator Dict::end() const +{ + return dict.end(); +} + +CV__DNN_INLINE_NS_END } } diff --git a/IPL/include/opencv/opencv2/dnn/layer.details.hpp b/IPL/include/opencv/opencv2/dnn/layer.details.hpp new file mode 100644 index 0000000..1133da5 --- /dev/null +++ b/IPL/include/opencv/opencv2/dnn/layer.details.hpp @@ -0,0 +1,78 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// +#ifndef OPENCV_DNN_LAYER_DETAILS_HPP +#define OPENCV_DNN_LAYER_DETAILS_HPP + +#include + +namespace cv { +namespace dnn { +CV__DNN_INLINE_NS_BEGIN + +/** @brief Registers layer constructor in runtime. +* @param type string, containing type name of the layer. +* @param constructorFunc pointer to the function of type LayerRegister::Constructor, which creates the layer. +* @details This macros must be placed inside the function code. +*/ +#define CV_DNN_REGISTER_LAYER_FUNC(type, constructorFunc) \ + cv::dnn::LayerFactory::registerLayer(#type, constructorFunc); + +/** @brief Registers layer class in runtime. + * @param type string, containing type name of the layer. + * @param class C++ class, derived from Layer. + * @details This macros must be placed inside the function code. + */ +#define CV_DNN_REGISTER_LAYER_CLASS(type, class) \ + cv::dnn::LayerFactory::registerLayer(#type, cv::dnn::details::_layerDynamicRegisterer); + +/** @brief Registers layer constructor on module load time. +* @param type string, containing type name of the layer. +* @param constructorFunc pointer to the function of type LayerRegister::Constructor, which creates the layer. +* @details This macros must be placed outside the function code. +*/ +#define CV_DNN_REGISTER_LAYER_FUNC_STATIC(type, constructorFunc) \ +static cv::dnn::details::_LayerStaticRegisterer __LayerStaticRegisterer_##type(#type, constructorFunc); + +/** @brief Registers layer class on module load time. + * @param type string, containing type name of the layer. + * @param class C++ class, derived from Layer. + * @details This macros must be placed outside the function code. + */ +#define CV_DNN_REGISTER_LAYER_CLASS_STATIC(type, class) \ +Ptr __LayerStaticRegisterer_func_##type(LayerParams ¶ms) \ + { return Ptr(new class(params)); } \ +static cv::dnn::details::_LayerStaticRegisterer __LayerStaticRegisterer_##type(#type, __LayerStaticRegisterer_func_##type); + +namespace details { + +template +Ptr _layerDynamicRegisterer(LayerParams ¶ms) +{ + return Ptr(LayerClass::create(params)); +} + +//allows automatically register created layer on module load time +class _LayerStaticRegisterer +{ + String type; +public: + + _LayerStaticRegisterer(const String &layerType, LayerFactory::Constructor layerConstructor) + { + this->type = layerType; + LayerFactory::registerLayer(layerType, layerConstructor); + } + + ~_LayerStaticRegisterer() + { + LayerFactory::unregisterLayer(type); + } +}; + +} // namespace +CV__DNN_INLINE_NS_END +}} // namespace + +#endif diff --git a/IPL/include/opencv/opencv2/dnn/layer.hpp b/IPL/include/opencv/opencv2/dnn/layer.hpp index b28b6ac..8500599 100644 --- a/IPL/include/opencv/opencv2/dnn/layer.hpp +++ b/IPL/include/opencv/opencv2/dnn/layer.hpp @@ -39,18 +39,17 @@ // //M*/ -#ifndef __OPENCV_DNN_LAYER_HPP__ -#define __OPENCV_DNN_LAYER_HPP__ +#ifndef OPENCV_DNN_LAYER_HPP +#define OPENCV_DNN_LAYER_HPP #include -namespace cv -{ -namespace dnn -{ +namespace cv { +namespace dnn { +CV__DNN_INLINE_NS_BEGIN //! @addtogroup dnn //! @{ //! -//! @defgroup LayerFactoryModule Utilities for new layers registration +//! @defgroup dnnLayerFactory Utilities for New Layers Registration //! @{ /** @brief %Layer factory allows to create instances of registered layers. */ @@ -59,89 +58,28 @@ class CV_EXPORTS LayerFactory public: //! Each Layer class must provide this function to the factory - typedef Ptr(*Constuctor)(LayerParams ¶ms); + typedef Ptr(*Constructor)(LayerParams ¶ms); - //! Registers the layer class with typename @p type and specified @p constructor. - static void registerLayer(const String &type, Constuctor constructor); + //! Registers the layer class with typename @p type and specified @p constructor. Thread-safe. + static void registerLayer(const String &type, Constructor constructor); - //! Unregisters registered layer with specified type name. + //! Unregisters registered layer with specified type name. Thread-safe. static void unregisterLayer(const String &type); /** @brief Creates instance of registered layer. * @param type type name of creating layer. * @param params parameters which will be used for layer initialization. + * @note Thread-safe. */ static Ptr createLayerInstance(const String &type, LayerParams& params); private: LayerFactory(); - - struct Impl; - static Ptr impl(); }; -/** @brief Registers layer constructor in runtime. -* @param type string, containing type name of the layer. -* @param constuctorFunc pointer to the function of type LayerRegister::Constuctor, which creates the layer. -* @details This macros must be placed inside the function code. -*/ -#define REG_RUNTIME_LAYER_FUNC(type, constuctorFunc) \ - LayerFactory::registerLayer(#type, constuctorFunc); - -/** @brief Registers layer class in runtime. - * @param type string, containing type name of the layer. - * @param class C++ class, derived from Layer. - * @details This macros must be placed inside the function code. - */ -#define REG_RUNTIME_LAYER_CLASS(type, class) \ - LayerFactory::registerLayer(#type, _layerDynamicRegisterer); - -/** @brief Registers layer constructor on module load time. -* @param type string, containing type name of the layer. -* @param constuctorFunc pointer to the function of type LayerRegister::Constuctor, which creates the layer. -* @details This macros must be placed outside the function code. -*/ -#define REG_STATIC_LAYER_FUNC(type, constuctorFunc) \ -static _LayerStaticRegisterer __LayerStaticRegisterer_##type(#type, constuctorFunc); - -/** @brief Registers layer class on module load time. - * @param type string, containing type name of the layer. - * @param class C++ class, derived from Layer. - * @details This macros must be placed outside the function code. - */ -#define REG_STATIC_LAYER_CLASS(type, class) \ -Ptr __LayerStaticRegisterer_func_##type(LayerParams ¶ms) \ - { return Ptr(new class(params)); } \ -static _LayerStaticRegisterer __LayerStaticRegisterer_##type(#type, __LayerStaticRegisterer_func_##type); - - //! @} //! @} - - -template -Ptr _layerDynamicRegisterer(LayerParams ¶ms) -{ - return Ptr(new LayerClass(params)); -} - -//allows automatically register created layer on module load time -struct _LayerStaticRegisterer -{ - String type; - - _LayerStaticRegisterer(const String &type, LayerFactory::Constuctor constuctor) - { - this->type = type; - LayerFactory::registerLayer(type, constuctor); - } - - ~_LayerStaticRegisterer() - { - LayerFactory::unregisterLayer(type); - } -}; - +CV__DNN_INLINE_NS_END } } #endif diff --git a/IPL/include/opencv/opencv2/dnn/shape_utils.hpp b/IPL/include/opencv/opencv2/dnn/shape_utils.hpp new file mode 100644 index 0000000..5b8d953 --- /dev/null +++ b/IPL/include/opencv/opencv2/dnn/shape_utils.hpp @@ -0,0 +1,229 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2013, OpenCV Foundation, all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef OPENCV_DNN_DNN_SHAPE_UTILS_HPP +#define OPENCV_DNN_DNN_SHAPE_UTILS_HPP + +#include +#include // CV_MAX_DIM +#include +#include +#include + +namespace cv { +namespace dnn { +CV__DNN_INLINE_NS_BEGIN + +//Slicing + +struct _Range : public cv::Range +{ + _Range(const Range &r) : cv::Range(r) {} + _Range(int start_, int size_ = 1) : cv::Range(start_, start_ + size_) {} +}; + +static inline Mat slice(const Mat &m, const _Range &r0) +{ + Range ranges[CV_MAX_DIM]; + for (int i = 1; i < m.dims; i++) + ranges[i] = Range::all(); + ranges[0] = r0; + return m(&ranges[0]); +} + +static inline Mat slice(const Mat &m, const _Range &r0, const _Range &r1) +{ + CV_Assert(m.dims >= 2); + Range ranges[CV_MAX_DIM]; + for (int i = 2; i < m.dims; i++) + ranges[i] = Range::all(); + ranges[0] = r0; + ranges[1] = r1; + return m(&ranges[0]); +} + +static inline Mat slice(const Mat &m, const _Range &r0, const _Range &r1, const _Range &r2) +{ + CV_Assert(m.dims >= 3); + Range ranges[CV_MAX_DIM]; + for (int i = 3; i < m.dims; i++) + ranges[i] = Range::all(); + ranges[0] = r0; + ranges[1] = r1; + ranges[2] = r2; + return m(&ranges[0]); +} + +static inline Mat slice(const Mat &m, const _Range &r0, const _Range &r1, const _Range &r2, const _Range &r3) +{ + CV_Assert(m.dims >= 4); + Range ranges[CV_MAX_DIM]; + for (int i = 4; i < m.dims; i++) + ranges[i] = Range::all(); + ranges[0] = r0; + ranges[1] = r1; + ranges[2] = r2; + ranges[3] = r3; + return m(&ranges[0]); +} + +static inline Mat getPlane(const Mat &m, int n, int cn) +{ + CV_Assert(m.dims > 2); + int sz[CV_MAX_DIM]; + for(int i = 2; i < m.dims; i++) + { + sz[i-2] = m.size.p[i]; + } + return Mat(m.dims - 2, sz, m.type(), (void*)m.ptr(n, cn)); +} + +static inline MatShape shape(const int* dims, const int n) +{ + MatShape shape; + shape.assign(dims, dims + n); + return shape; +} + +static inline MatShape shape(const Mat& mat) +{ + return shape(mat.size.p, mat.dims); +} + +static inline MatShape shape(const MatSize& sz) +{ + return shape(sz.p, sz.dims()); +} + +static inline MatShape shape(const UMat& mat) +{ + return shape(mat.size.p, mat.dims); +} + +#if 0 // issues with MatExpr wrapped into InputArray +static inline +MatShape shape(InputArray input) +{ + int sz[CV_MAX_DIM]; + int ndims = input.sizend(sz); + return shape(sz, ndims); +} +#endif + +namespace {inline bool is_neg(int i) { return i < 0; }} + +static inline MatShape shape(int a0, int a1=-1, int a2=-1, int a3=-1) +{ + int dims[] = {a0, a1, a2, a3}; + MatShape s = shape(dims, 4); + s.erase(std::remove_if(s.begin(), s.end(), is_neg), s.end()); + return s; +} + +static inline int total(const MatShape& shape, int start = -1, int end = -1) +{ + if (start == -1) start = 0; + if (end == -1) end = (int)shape.size(); + + if (shape.empty()) + return 0; + + int elems = 1; + CV_Assert(start <= (int)shape.size() && end <= (int)shape.size() && + start <= end); + for(int i = start; i < end; i++) + { + elems *= shape[i]; + } + return elems; +} + +static inline MatShape concat(const MatShape& a, const MatShape& b) +{ + MatShape c = a; + c.insert(c.end(), b.begin(), b.end()); + + return c; +} + +static inline std::string toString(const MatShape& shape, const String& name = "") +{ + std::ostringstream ss; + if (!name.empty()) + ss << name << ' '; + ss << '['; + for(size_t i = 0, n = shape.size(); i < n; ++i) + ss << ' ' << shape[i]; + ss << " ]"; + return ss.str(); +} +static inline void print(const MatShape& shape, const String& name = "") +{ + std::cout << toString(shape, name) << std::endl; +} +static inline std::ostream& operator<<(std::ostream &out, const MatShape& shape) +{ + out << toString(shape); + return out; +} + +inline int clamp(int ax, int dims) +{ + return ax < 0 ? ax + dims : ax; +} + +inline int clamp(int ax, const MatShape& shape) +{ + return clamp(ax, (int)shape.size()); +} + +inline Range clamp(const Range& r, int axisSize) +{ + Range clamped(std::max(r.start, 0), + r.end > 0 ? std::min(r.end, axisSize) : axisSize + r.end + 1); + CV_Assert_N(clamped.start < clamped.end, clamped.end <= axisSize); + return clamped; +} + +CV__DNN_INLINE_NS_END +} +} +#endif diff --git a/IPL/include/opencv/opencv2/dnn/utils/inference_engine.hpp b/IPL/include/opencv/opencv2/dnn/utils/inference_engine.hpp new file mode 100644 index 0000000..7db93a9 --- /dev/null +++ b/IPL/include/opencv/opencv2/dnn/utils/inference_engine.hpp @@ -0,0 +1,64 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// +// Copyright (C) 2018-2019, Intel Corporation, all rights reserved. +// Third party copyrights are property of their respective owners. + +#ifndef OPENCV_DNN_UTILS_INF_ENGINE_HPP +#define OPENCV_DNN_UTILS_INF_ENGINE_HPP + +#include "../dnn.hpp" + +namespace cv { namespace dnn { +CV__DNN_INLINE_NS_BEGIN + + +/* Values for 'OPENCV_DNN_BACKEND_INFERENCE_ENGINE_TYPE' parameter */ +#define CV_DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_API "NN_BUILDER" +#define CV_DNN_BACKEND_INFERENCE_ENGINE_NGRAPH "NGRAPH" + +/** @brief Returns Inference Engine internal backend API. + * + * See values of `CV_DNN_BACKEND_INFERENCE_ENGINE_*` macros. + * + * Default value is controlled through `OPENCV_DNN_BACKEND_INFERENCE_ENGINE_TYPE` runtime parameter (environment variable). + */ +CV_EXPORTS_W cv::String getInferenceEngineBackendType(); + +/** @brief Specify Inference Engine internal backend API. + * + * See values of `CV_DNN_BACKEND_INFERENCE_ENGINE_*` macros. + * + * @returns previous value of internal backend API + */ +CV_EXPORTS_W cv::String setInferenceEngineBackendType(const cv::String& newBackendType); + + +/** @brief Release a Myriad device (binded by OpenCV). + * + * Single Myriad device cannot be shared across multiple processes which uses + * Inference Engine's Myriad plugin. + */ +CV_EXPORTS_W void resetMyriadDevice(); + + +/* Values for 'OPENCV_DNN_IE_VPU_TYPE' parameter */ +#define CV_DNN_INFERENCE_ENGINE_VPU_TYPE_UNSPECIFIED "" +/// Intel(R) Movidius(TM) Neural Compute Stick, NCS (USB 03e7:2150), Myriad2 (https://software.intel.com/en-us/movidius-ncs) +#define CV_DNN_INFERENCE_ENGINE_VPU_TYPE_MYRIAD_2 "Myriad2" +/// Intel(R) Neural Compute Stick 2, NCS2 (USB 03e7:2485), MyriadX (https://software.intel.com/ru-ru/neural-compute-stick) +#define CV_DNN_INFERENCE_ENGINE_VPU_TYPE_MYRIAD_X "MyriadX" + + +/** @brief Returns Inference Engine VPU type. + * + * See values of `CV_DNN_INFERENCE_ENGINE_VPU_TYPE_*` macros. + */ +CV_EXPORTS_W cv::String getInferenceEngineVPUType(); + + +CV__DNN_INLINE_NS_END +}} // namespace + +#endif // OPENCV_DNN_UTILS_INF_ENGINE_HPP diff --git a/IPL/include/opencv/opencv2/dnn/version.hpp b/IPL/include/opencv/opencv2/dnn/version.hpp new file mode 100644 index 0000000..f5c7424 --- /dev/null +++ b/IPL/include/opencv/opencv2/dnn/version.hpp @@ -0,0 +1,21 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#ifndef OPENCV_DNN_VERSION_HPP +#define OPENCV_DNN_VERSION_HPP + +/// Use with major OpenCV version only. +#define OPENCV_DNN_API_VERSION 20200310 + +#if !defined CV_DOXYGEN && !defined CV_STATIC_ANALYSIS && !defined CV_DNN_DONT_ADD_INLINE_NS +#define CV__DNN_INLINE_NS __CV_CAT(dnn4_v, OPENCV_DNN_API_VERSION) +#define CV__DNN_INLINE_NS_BEGIN namespace CV__DNN_INLINE_NS { +#define CV__DNN_INLINE_NS_END } +namespace cv { namespace dnn { namespace CV__DNN_INLINE_NS { } using namespace CV__DNN_INLINE_NS; }} +#else +#define CV__DNN_INLINE_NS_BEGIN +#define CV__DNN_INLINE_NS_END +#endif + +#endif // OPENCV_DNN_VERSION_HPP diff --git a/IPL/include/opencv/opencv2/dnn_superres.hpp b/IPL/include/opencv/opencv2/dnn_superres.hpp new file mode 100644 index 0000000..953f90f --- /dev/null +++ b/IPL/include/opencv/opencv2/dnn_superres.hpp @@ -0,0 +1,125 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#ifndef _OPENCV_DNN_SUPERRES_HPP_ +#define _OPENCV_DNN_SUPERRES_HPP_ + +/** @defgroup dnn_superres DNN used for super resolution + +This module contains functionality for upscaling an image via convolutional neural networks. +The following four models are implemented: + +- EDSR +- ESPCN +- FSRCNN +- LapSRN + +*/ + +#include "opencv2/core.hpp" +#include "opencv2/dnn.hpp" + +namespace cv +{ +namespace dnn_superres +{ + +//! @addtogroup dnn_superres +//! @{ + +/** @brief A class to upscale images via convolutional neural networks. +The following four models are implemented: + +- edsr +- espcn +- fsrcnn +- lapsrn + */ + +class CV_EXPORTS_W DnnSuperResImpl +{ +private: + + /** @brief Net which holds the desired neural network + */ + dnn::Net net; + + std::string alg; //algorithm + + int sc; //scale factor + + void reconstruct_YCrCb(InputArray inpImg, InputArray origImg, OutputArray outpImg, int scale); + + void preprocess_YCrCb(InputArray inpImg, OutputArray outpImg); + +public: + + /** @brief Empty constructor for python + */ + CV_WRAP static Ptr create(); + + // /** @brief Empty constructor + // */ + DnnSuperResImpl(); + + /** @brief Constructor which immediately sets the desired model + @param algo String containing one of the desired models: + - __edsr__ + - __espcn__ + - __fsrcnn__ + - __lapsrn__ + @param scale Integer specifying the upscale factor + */ + DnnSuperResImpl(const String& algo, int scale); + + /** @brief Read the model from the given path + @param path Path to the model file. + */ + CV_WRAP void readModel(const String& path); + + /** @brief Read the model from the given path + @param weights Path to the model weights file. + @param definition Path to the model definition file. + */ + void readModel(const String& weights, const String& definition); + + /** @brief Set desired model + @param algo String containing one of the desired models: + - __edsr__ + - __espcn__ + - __fsrcnn__ + - __lapsrn__ + @param scale Integer specifying the upscale factor + */ + CV_WRAP void setModel(const String& algo, int scale); + + /** @brief Upsample via neural network + @param img Image to upscale + @param result Destination upscaled image + */ + CV_WRAP void upsample(InputArray img, OutputArray result); + + /** @brief Upsample via neural network of multiple outputs + @param img Image to upscale + @param imgs_new Destination upscaled images + @param scale_factors Scaling factors of the output nodes + @param node_names Names of the output nodes in the neural network + */ + CV_WRAP void upsampleMultioutput(InputArray img, std::vector &imgs_new, const std::vector& scale_factors, const std::vector& node_names); + + /** @brief Returns the scale factor of the model: + @return Current scale factor. + */ + CV_WRAP int getScale(); + + /** @brief Returns the scale factor of the model: + @return Current algorithm. + */ + CV_WRAP String getAlgorithm(); +}; + +//! @} dnn_superres + +}} // cv::dnn_superres:: +#endif diff --git a/IPL/include/opencv/opencv2/dpm.hpp b/IPL/include/opencv/opencv2/dpm.hpp index 387a311..ab604ab 100644 --- a/IPL/include/opencv/opencv2/dpm.hpp +++ b/IPL/include/opencv/opencv2/dpm.hpp @@ -96,6 +96,9 @@ namespace cv namespace dpm { +//! @addtogroup dpm +//! @{ + /** @brief This is a C++ abstract class, it provides external user API to work with DPM. */ class CV_EXPORTS_W DPMDetector @@ -142,6 +145,8 @@ class CV_EXPORTS_W DPMDetector virtual ~DPMDetector(){} }; +//! @} + } // namespace dpm } // namespace cv diff --git a/IPL/include/opencv/opencv2/face.hpp b/IPL/include/opencv/opencv2/face.hpp index d7237bc..8c4bda3 100644 --- a/IPL/include/opencv/opencv2/face.hpp +++ b/IPL/include/opencv/opencv2/face.hpp @@ -40,7 +40,7 @@ the use of this software, even if advised of the possibility of such damage. #define __OPENCV_FACE_HPP__ /** -@defgroup face Face Recognition +@defgroup face Face Analysis - @ref face_changelog - @ref tutorial_face_main @@ -70,7 +70,7 @@ which is available since the 2.4 release. I suggest you take a look at its descr Algorithm provides the following features for all derived classes: -- So called “virtual constructor”. That is, each Algorithm derivative is registered at program +- So called "virtual constructor". That is, each Algorithm derivative is registered at program start and you can get the list of registered algorithms and create instance of a particular algorithm by its name (see Algorithm::create). If you plan to add your own algorithms, it is good practice to add a unique prefix to your algorithms to distinguish them from other @@ -112,8 +112,8 @@ Here is an example of setting a threshold for the Eigenfaces method, when creati int num_components = 10; double threshold = 10.0; // Then if you want to have a cv::FaceRecognizer with a confidence threshold, -// create the concrete implementation with the appropiate parameters: -Ptr model = createEigenFaceRecognizer(num_components, threshold); +// create the concrete implementation with the appropriate parameters: +Ptr model = EigenFaceRecognizer::create(num_components, threshold); @endcode Sometimes it's impossible to train the model, just to experiment with threshold values. Thanks to @@ -131,7 +131,7 @@ If you've set the threshold to 0.0 as we did above, then: @code // -Mat img = imread("person1/3.jpg", CV_LOAD_IMAGE_GRAYSCALE); +Mat img = imread("person1/3.jpg", IMREAD_GRAYSCALE); // Get a prediction from the model. Note: We've set a threshold of 0.0 above, // since the distance is almost always larger than 0.0, you'll get -1 as // label, which indicates, this face is unknown @@ -148,7 +148,7 @@ Since every FaceRecognizer is a Algorithm, you can use Algorithm::name to get th @code // Create a FaceRecognizer: -Ptr model = createEigenFaceRecognizer(); +Ptr model = EigenFaceRecognizer::create(); // And here's how to get its name: String name = model->name(); @endcode @@ -162,7 +162,7 @@ class CV_EXPORTS_W FaceRecognizer : public Algorithm @param src The training images, that means the faces you want to learn. The data has to be given as a vector\. @param labels The labels corresponding to the images have to be given either as a vector\ - or a + or a Mat of type CV_32SC1. The following source code snippet shows you how to learn a Fisherfaces model on a given set of images. The images are read with imread and pushed into a std::vector\. The labels of each @@ -175,14 +175,16 @@ class CV_EXPORTS_W FaceRecognizer : public Algorithm // holds images and labels vector images; vector labels; + // using Mat of type CV_32SC1 + // Mat labels(number_of_samples, 1, CV_32SC1); // images for first person - images.push_back(imread("person0/0.jpg", CV_LOAD_IMAGE_GRAYSCALE)); labels.push_back(0); - images.push_back(imread("person0/1.jpg", CV_LOAD_IMAGE_GRAYSCALE)); labels.push_back(0); - images.push_back(imread("person0/2.jpg", CV_LOAD_IMAGE_GRAYSCALE)); labels.push_back(0); + images.push_back(imread("person0/0.jpg", IMREAD_GRAYSCALE)); labels.push_back(0); + images.push_back(imread("person0/1.jpg", IMREAD_GRAYSCALE)); labels.push_back(0); + images.push_back(imread("person0/2.jpg", IMREAD_GRAYSCALE)); labels.push_back(0); // images for second person - images.push_back(imread("person1/0.jpg", CV_LOAD_IMAGE_GRAYSCALE)); labels.push_back(1); - images.push_back(imread("person1/1.jpg", CV_LOAD_IMAGE_GRAYSCALE)); labels.push_back(1); - images.push_back(imread("person1/2.jpg", CV_LOAD_IMAGE_GRAYSCALE)); labels.push_back(1); + images.push_back(imread("person1/0.jpg", IMREAD_GRAYSCALE)); labels.push_back(1); + images.push_back(imread("person1/1.jpg", IMREAD_GRAYSCALE)); labels.push_back(1); + images.push_back(imread("person1/2.jpg", IMREAD_GRAYSCALE)); labels.push_back(1); @endcode Now that you have read some images, we can create a new FaceRecognizer. In this example I'll create @@ -192,7 +194,7 @@ class CV_EXPORTS_W FaceRecognizer : public Algorithm // Create a new Fisherfaces model and retain all available Fisherfaces, // this is the most common usage of this specific FaceRecognizer: // - Ptr model = createFisherFaceRecognizer(); + Ptr model = FisherFaceRecognizer::create(); @endcode And finally train it on the given dataset (the face images and labels): @@ -211,7 +213,7 @@ class CV_EXPORTS_W FaceRecognizer : public Algorithm @param src The training images, that means the faces you want to learn. The data has to be given as a vector\. @param labels The labels corresponding to the images have to be given either as a vector\ or - a + a Mat of type CV_32SC1. This method updates a (probably trained) FaceRecognizer, but only if the algorithm supports it. The Local Binary Patterns Histograms (LBPH) recognizer (see createLBPHFaceRecognizer) can be updated. @@ -223,7 +225,7 @@ class CV_EXPORTS_W FaceRecognizer : public Algorithm // Create a new LBPH model (it can be updated) and use the default parameters, // this is the most common usage of this specific FaceRecognizer: // - Ptr model = createLBPHFaceRecognizer(); + Ptr model = LBPHFaceRecognizer::create(); // This is the common interface to train all of the available cv::FaceRecognizer // implementations: // @@ -241,7 +243,7 @@ class CV_EXPORTS_W FaceRecognizer : public Algorithm // with the new features extracted from newImages! @endcode - Calling update on an Eigenfaces model (see createEigenFaceRecognizer), which doesn't support + Calling update on an Eigenfaces model (see EigenFaceRecognizer::create), which doesn't support updating, will throw an error similar to: @code @@ -256,7 +258,7 @@ class CV_EXPORTS_W FaceRecognizer : public Algorithm CV_WRAP virtual void update(InputArrayOfArrays src, InputArray labels); /** @overload */ - CV_WRAP int predict(InputArray src) const; + CV_WRAP_AS(predict_label) int predict(InputArray src) const; /** @brief Predicts a label and associated confidence (e.g. distance) for a given input image. @@ -275,7 +277,7 @@ class CV_EXPORTS_W FaceRecognizer : public Algorithm // Do your initialization here (create the cv::FaceRecognizer model) ... // ... // Read in a sample image: - Mat img = imread("person1/3.jpg", CV_LOAD_IMAGE_GRAYSCALE); + Mat img = imread("person1/3.jpg", IMREAD_GRAYSCALE); // And get a prediction from the cv::FaceRecognizer: int predicted = model->predict(img); @endcode @@ -286,7 +288,7 @@ class CV_EXPORTS_W FaceRecognizer : public Algorithm using namespace cv; // Do your initialization here (create the cv::FaceRecognizer model) ... // ... - Mat img = imread("person1/3.jpg", CV_LOAD_IMAGE_GRAYSCALE); + Mat img = imread("person1/3.jpg", IMREAD_GRAYSCALE); // Some variables for the predicted label and associated confidence (e.g. distance): int predicted_label = -1; double predicted_confidence = 0.0; @@ -300,12 +302,11 @@ class CV_EXPORTS_W FaceRecognizer : public Algorithm /** @brief - if implemented - send all result of prediction to collector that can be used for somehow custom result handling @param src Sample image to get a prediction from. @param collector User-defined collector object that accepts all results - @param state - optional user-defined state token that should be passed back from FaceRecognizer implementation To implement this method u just have to do same internal cycle as in predict(InputArray src, CV_OUT int &label, CV_OUT double &confidence) but not try to get "best@ result, just resend it to caller side with given collector */ - CV_WRAP virtual void predict(InputArray src, Ptr collector, const int state = 0) const = 0; + CV_WRAP_AS(predict_collect) virtual void predict(InputArray src, Ptr collector) const = 0; /** @brief Saves a FaceRecognizer and its model state. @@ -319,7 +320,7 @@ class CV_EXPORTS_W FaceRecognizer : public Algorithm The suffix const means that prediction does not affect the internal model state, so the method can be safely called from within different threads. */ - CV_WRAP virtual void save(const String& filename) const; + CV_WRAP virtual void write(const String& filename) const; /** @brief Loads a FaceRecognizer and its model state. @@ -328,16 +329,19 @@ class CV_EXPORTS_W FaceRecognizer : public Algorithm FaceRecognizer::load(FileStorage& fs) in turn gets called by FaceRecognizer::load(const String& filename), to ease saving a model. */ - CV_WRAP virtual void load(const String& filename); + CV_WRAP virtual void read(const String& filename); /** @overload Saves this model to a given FileStorage. @param fs The FileStorage to store this FaceRecognizer to. */ - virtual void save(FileStorage& fs) const = 0; + virtual void write(FileStorage& fs) const CV_OVERRIDE = 0; /** @overload */ - virtual void load(const FileStorage& fs) = 0; + virtual void read(const FileNode& fn) CV_OVERRIDE = 0; + + /** @overload */ + virtual bool empty() const CV_OVERRIDE = 0; /** @brief Sets string info for the specified model's label. @@ -358,8 +362,10 @@ class CV_EXPORTS_W FaceRecognizer : public Algorithm info. */ CV_WRAP virtual std::vector getLabelsByString(const String& str) const; - /** @brief threshhold parameter accessor - required for default BestMinDist collector */ + /** @brief threshold parameter accessor - required for default BestMinDist collector */ virtual double getThreshold() const = 0; + /** @brief Sets threshold of model */ + virtual void setThreshold(double val) = 0; protected: // Stored pairs "label id - string info" std::map _labelsInfo; @@ -370,5 +376,11 @@ class CV_EXPORTS_W FaceRecognizer : public Algorithm }} #include "opencv2/face/facerec.hpp" - -#endif +#include "opencv2/face/facemark.hpp" +#include "opencv2/face/facemark_train.hpp" +#include "opencv2/face/facemarkLBF.hpp" +#include "opencv2/face/facemarkAAM.hpp" +#include "opencv2/face/face_alignment.hpp" +#include "opencv2/face/mace.hpp" + +#endif // __OPENCV_FACE_HPP__ diff --git a/IPL/include/opencv/opencv2/face/bif.hpp b/IPL/include/opencv/opencv2/face/bif.hpp new file mode 100644 index 0000000..d4f0ec5 --- /dev/null +++ b/IPL/include/opencv/opencv2/face/bif.hpp @@ -0,0 +1,83 @@ +/* +By downloading, copying, installing or using the software you agree to this license. +If you do not agree to this license, do not download, install, +copy or use the software. + + + License Agreement + For Open Source Computer Vision Library + (3-clause BSD License) + +Copyright (C) 2000-2015, Intel Corporation, all rights reserved. +Copyright (C) 2009-2011, Willow Garage Inc., all rights reserved. +Copyright (C) 2009-2015, NVIDIA Corporation, all rights reserved. +Copyright (C) 2010-2013, Advanced Micro Devices, Inc., all rights reserved. +Copyright (C) 2015, OpenCV Foundation, all rights reserved. +Copyright (C) 2015, Itseez Inc., all rights reserved. +Third party copyrights are property of their respective owners. + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + * Neither the names of the copyright holders nor the names of the contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + +This software is provided by the copyright holders and contributors "as is" and +any express or implied warranties, including, but not limited to, the implied +warranties of merchantability and fitness for a particular purpose are disclaimed. +In no event shall copyright holders or contributors be liable for any direct, +indirect, incidental, special, exemplary, or consequential damages +(including, but not limited to, procurement of substitute goods or services; +loss of use, data, or profits; or business interruption) however caused +and on any theory of liability, whether in contract, strict liability, +or tort (including negligence or otherwise) arising in any way out of +the use of this software, even if advised of the possibility of such damage. +*/ + +#ifndef __OPENCV_BIF_HPP__ +#define __OPENCV_BIF_HPP__ + +#include "opencv2/core.hpp" + +namespace cv { +namespace face { + +/** Implementation of bio-inspired features (BIF) from the paper: + * Guo, Guodong, et al. "Human age estimation using bio-inspired features." + * Computer Vision and Pattern Recognition, 2009. CVPR 2009. + */ +class CV_EXPORTS_W BIF : public Algorithm { +public: + /** @returns The number of filter bands used for computing BIF. */ + CV_WRAP virtual int getNumBands() const = 0; + + /** @returns The number of image rotations. */ + CV_WRAP virtual int getNumRotations() const = 0; + + /** Computes features sby input image. + * @param image Input image (CV_32FC1). + * @param features Feature vector (CV_32FC1). + */ + CV_WRAP virtual void compute(InputArray image, + OutputArray features) const = 0; + + /** + * @param num_bands The number of filter bands (<=8) used for computing BIF. + * @param num_rotations The number of image rotations for computing BIF. + * @returns Object for computing BIF. + */ + CV_WRAP static Ptr create(int num_bands = 8, int num_rotations = 12); +}; + +} // namespace cv +} // namespace face + +#endif // #ifndef __OPENCV_FACEREC_HPP__ diff --git a/IPL/include/opencv/opencv2/face/face_alignment.hpp b/IPL/include/opencv/opencv2/face/face_alignment.hpp new file mode 100644 index 0000000..e96081c --- /dev/null +++ b/IPL/include/opencv/opencv2/face/face_alignment.hpp @@ -0,0 +1,60 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +#ifndef __OPENCV_FACE_ALIGNMENT_HPP__ +#define __OPENCV_FACE_ALIGNMENT_HPP__ + +#include "opencv2/face/facemark_train.hpp" + +namespace cv{ +namespace face{ +class CV_EXPORTS_W FacemarkKazemi : public Facemark +{ +public: + struct CV_EXPORTS Params + { + /** + * \brief Constructor + */ + Params(); + /// cascade_depth This stores the deapth of cascade used for training. + unsigned long cascade_depth; + /// tree_depth This stores the max height of the regression tree built. + unsigned long tree_depth; + /// num_trees_per_cascade_level This stores number of trees fit per cascade level. + unsigned long num_trees_per_cascade_level; + /// learning_rate stores the learning rate in gradient boosting, also referred as shrinkage. + float learning_rate; + /// oversampling_amount stores number of initialisations used to create training samples. + unsigned long oversampling_amount; + /// num_test_coordinates stores number of test coordinates. + unsigned long num_test_coordinates; + /// lambda stores a value to calculate probability of closeness of two coordinates. + float lambda; + /// num_test_splits stores number of random test splits generated. + unsigned long num_test_splits; + /// configfile stores the name of the file containing the values of training parameters + String configfile; + }; + static Ptr create(const FacemarkKazemi::Params ¶meters = FacemarkKazemi::Params()); + virtual ~FacemarkKazemi(); + + /** @brief This function is used to train the model using gradient boosting to get a cascade of regressors + *which can then be used to predict shape. + *@param images A vector of type cv::Mat which stores the images which are used in training samples. + *@param landmarks A vector of vectors of type cv::Point2f which stores the landmarks detected in a particular image. + *@param scale A size of type cv::Size to which all images and landmarks have to be scaled to. + *@param configfile A variable of type std::string which stores the name of the file storing parameters for training the model. + *@param modelFilename A variable of type std::string which stores the name of the trained model file that has to be saved. + *@returns A boolean value. The function returns true if the model is trained properly or false if it is not trained. + */ + virtual bool training(std::vector& images, std::vector< std::vector >& landmarks,std::string configfile,Size scale,std::string modelFilename = "face_landmarks.dat")=0; + + /// set the custom face detector + virtual bool setFaceDetector(bool(*f)(InputArray , OutputArray, void*), void* userData)=0; + /// get faces using the custom detector + virtual bool getFaces(InputArray image, OutputArray faces)=0; +}; + +}} // namespace +#endif diff --git a/IPL/include/opencv/opencv2/face/facemark.hpp b/IPL/include/opencv/opencv2/face/facemark.hpp new file mode 100644 index 0000000..86e9384 --- /dev/null +++ b/IPL/include/opencv/opencv2/face/facemark.hpp @@ -0,0 +1,95 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +/* +This file was part of GSoC Project: Facemark API for OpenCV +Final report: https://gist.github.com/kurnianggoro/74de9121e122ad0bd825176751d47ecc +Student: Laksono Kurnianggoro +Mentor: Delia Passalacqua +*/ + +#ifndef __OPENCV_FACELANDMARK_HPP__ +#define __OPENCV_FACELANDMARK_HPP__ + +/** +@defgroup face Face Analysis +- @ref tutorial_table_of_content_facemark +- The Facemark API +*/ + +#include "opencv2/core.hpp" +#include + + +namespace cv { +namespace face { + + +/** @brief Abstract base class for all facemark models + +To utilize this API in your program, please take a look at the @ref tutorial_table_of_content_facemark +### Description + +Facemark is a base class which provides universal access to any specific facemark algorithm. +Therefore, the users should declare a desired algorithm before they can use it in their application. + +Here is an example on how to declare a facemark algorithm: +@code +// Using Facemark in your code: +Ptr facemark = createFacemarkLBF(); +@endcode + +The typical pipeline for facemark detection is as follows: +- Load the trained model using Facemark::loadModel. +- Perform the fitting on an image via Facemark::fit. +*/ +class CV_EXPORTS_W Facemark : public virtual Algorithm +{ +public: + + /** @brief A function to load the trained model before the fitting process. + @param model A string represent the filename of a trained model. + + Example of usage + @code + facemark->loadModel("../data/lbf.model"); + @endcode + */ + CV_WRAP virtual void loadModel( String model ) = 0; + // virtual void saveModel(String fs)=0; + + /** @brief Detect facial landmarks from an image. + @param image Input image. + @param faces Output of the function which represent region of interest of the detected faces. + Each face is stored in cv::Rect container. + @param landmarks The detected landmark points for each faces. + + Example of usage + @code + Mat image = imread("image.jpg"); + std::vector faces; + std::vector > landmarks; + facemark->fit(image, faces, landmarks); + @endcode + */ + CV_WRAP virtual bool fit( InputArray image, + InputArray faces, + OutputArrayOfArrays landmarks) = 0; +}; /* Facemark*/ + + +//! construct an AAM facemark detector +CV_EXPORTS_W Ptr createFacemarkAAM(); + +//! construct an LBF facemark detector +CV_EXPORTS_W Ptr createFacemarkLBF(); + +//! construct a Kazemi facemark detector +CV_EXPORTS_W Ptr createFacemarkKazemi(); + + +} // face +} // cv + +#endif //__OPENCV_FACELANDMARK_HPP__ diff --git a/IPL/include/opencv/opencv2/face/facemarkAAM.hpp b/IPL/include/opencv/opencv2/face/facemarkAAM.hpp new file mode 100644 index 0000000..6f96e4a --- /dev/null +++ b/IPL/include/opencv/opencv2/face/facemarkAAM.hpp @@ -0,0 +1,162 @@ +/* +By downloading, copying, installing or using the software you agree to this +license. If you do not agree to this license, do not download, install, +copy or use the software. + License Agreement + For Open Source Computer Vision Library + (3-clause BSD License) +Copyright (C) 2013, OpenCV Foundation, all rights reserved. +Third party copyrights are property of their respective owners. +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + * Neither the names of the copyright holders nor the names of the contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. +This software is provided by the copyright holders and contributors "as is" and +any express or implied warranties, including, but not limited to, the implied +warranties of merchantability and fitness for a particular purpose are +disclaimed. In no event shall copyright holders or contributors be liable for +any direct, indirect, incidental, special, exemplary, or consequential damages +(including, but not limited to, procurement of substitute goods or services; +loss of use, data, or profits; or business interruption) however caused +and on any theory of liability, whether in contract, strict liability, +or tort (including negligence or otherwise) arising in any way out of +the use of this software, even if advised of the possibility of such damage. + +This file was part of GSoC Project: Facemark API for OpenCV +Final report: https://gist.github.com/kurnianggoro/74de9121e122ad0bd825176751d47ecc +Student: Laksono Kurnianggoro +Mentor: Delia Passalacqua +*/ + +#ifndef __OPENCV_FACEMARK_AAM_HPP__ +#define __OPENCV_FACEMARK_AAM_HPP__ + +#include "opencv2/face/facemark_train.hpp" +namespace cv { +namespace face { + +//! @addtogroup face +//! @{ + +class CV_EXPORTS_W FacemarkAAM : public FacemarkTrain +{ +public: + struct CV_EXPORTS Params + { + /** + * \brief Constructor + */ + Params(); + + /** + * \brief Read parameters from file, currently unused + */ + void read(const FileNode& /*fn*/); + + /** + * \brief Read parameters from file, currently unused + */ + void write(FileStorage& /*fs*/) const; + + std::string model_filename; + int m; + int n; + int n_iter; + bool verbose; + bool save_model; + int max_m, max_n, texture_max_m; + std::vectorscales; + }; + + /** + * \brief Optional parameter for fitting process. + */ + struct CV_EXPORTS Config + { + Config( Mat rot = Mat::eye(2,2,CV_32F), + Point2f trans = Point2f(0.0f, 0.0f), + float scaling = 1.0f, + int scale_id=0 + ); + + Mat R; + Point2f t; + float scale; + int model_scale_idx; + + }; + + /** + * \brief Data container for the facemark::getData function + */ + struct CV_EXPORTS Data + { + std::vector s0; + }; + + /** + * \brief The model of AAM Algorithm + */ + struct CV_EXPORTS Model + { + std::vectorscales; + //!< defines the scales considered to build the model + + /*warping*/ + std::vector triangles; + //!< each element contains 3 values, represent index of facemarks that construct one triangle (obtained using delaunay triangulation) + + struct Texture{ + int max_m; //!< unused delete + Rect resolution; + //!< resolution of the current scale + Mat A; + //!< gray values from all face region in the dataset, projected in PCA space + Mat A0; + //!< average of gray values from all face region in the dataset + Mat AA; + //!< gray values from all erorded face region in the dataset, projected in PCA space + Mat AA0; + //!< average of gray values from all erorded face region in the dataset + + std::vector > textureIdx; + //!< index for warping of each delaunay triangle region constructed by 3 facemarks + std::vector base_shape; + //!< basic shape, normalized to be fit in an image with current detection resolution + std::vector ind1; + //!< index of pixels for mapping process to obtains the grays values of face region + std::vector ind2; + //!< index of pixels for mapping process to obtains the grays values of eroded face region + }; + std::vector textures; + //!< a container to holds the texture data for each scale of fitting + + /*shape*/ + std::vector s0; + //!< the basic shape obtained from training dataset + Mat S,Q; + //!< the encoded shapes from training data + + }; + + //! overload with additional Config structures + virtual bool fitConfig( InputArray image, InputArray roi, OutputArrayOfArrays _landmarks, const std::vector &runtime_params ) = 0; + + + //! initializer + static Ptr create(const FacemarkAAM::Params ¶meters = FacemarkAAM::Params() ); + virtual ~FacemarkAAM() {} + +}; /* AAM */ + +//! @} + +} /* namespace face */ +} /* namespace cv */ +#endif diff --git a/IPL/include/opencv/opencv2/face/facemarkLBF.hpp b/IPL/include/opencv/opencv2/face/facemarkLBF.hpp new file mode 100644 index 0000000..7f4cd32 --- /dev/null +++ b/IPL/include/opencv/opencv2/face/facemarkLBF.hpp @@ -0,0 +1,120 @@ +/* +By downloading, copying, installing or using the software you agree to this +license. If you do not agree to this license, do not download, install, +copy or use the software. + License Agreement + For Open Source Computer Vision Library + (3-clause BSD License) +Copyright (C) 2013, OpenCV Foundation, all rights reserved. +Third party copyrights are property of their respective owners. +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + * Neither the names of the copyright holders nor the names of the contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. +This software is provided by the copyright holders and contributors "as is" and +any express or implied warranties, including, but not limited to, the implied +warranties of merchantability and fitness for a particular purpose are +disclaimed. In no event shall copyright holders or contributors be liable for +any direct, indirect, incidental, special, exemplary, or consequential damages +(including, but not limited to, procurement of substitute goods or services; +loss of use, data, or profits; or business interruption) however caused +and on any theory of liability, whether in contract, strict liability, +or tort (including negligence or otherwise) arising in any way out of +the use of this software, even if advised of the possibility of such damage. + +This file was part of GSoC Project: Facemark API for OpenCV +Final report: https://gist.github.com/kurnianggoro/74de9121e122ad0bd825176751d47ecc +Student: Laksono Kurnianggoro +Mentor: Delia Passalacqua +*/ + +#ifndef __OPENCV_FACEMARK_LBF_HPP__ +#define __OPENCV_FACEMARK_LBF_HPP__ + +#include "opencv2/face/facemark_train.hpp" + +namespace cv { +namespace face { + +//! @addtogroup face +//! @{ + +class CV_EXPORTS_W FacemarkLBF : public FacemarkTrain +{ +public: + struct CV_EXPORTS Params + { + /** + * \brief Constructor + */ + Params(); + + double shape_offset; + //!< offset for the loaded face landmark points + String cascade_face; + //!< filename of the face detector model + bool verbose; + //!< show the training print-out + + int n_landmarks; + //!< number of landmark points + int initShape_n; + //!< multiplier for augment the training data + + int stages_n; + //!< number of refinement stages + int tree_n; + //!< number of tree in the model for each landmark point refinement + int tree_depth; + //!< the depth of decision tree, defines the size of feature + double bagging_overlap; + //!< overlap ratio for training the LBF feature + + std::string model_filename; + //!< filename where the trained model will be saved + bool save_model; //!< flag to save the trained model or not + unsigned int seed; //!< seed for shuffling the training data + + std::vector feats_m; + std::vector radius_m; + std::vector pupils[2]; + //!< index of facemark points on pupils of left and right eye + + Rect detectROI; + + void read(const FileNode& /*fn*/); + void write(FileStorage& /*fs*/) const; + + }; + + class BBox { + public: + BBox(); + ~BBox(); + BBox(double x, double y, double w, double h); + + Mat project(const Mat &shape) const; + Mat reproject(const Mat &shape) const; + + double x, y; + double x_center, y_center; + double x_scale, y_scale; + double width, height; + }; + + static Ptr create(const FacemarkLBF::Params ¶meters = FacemarkLBF::Params() ); + virtual ~FacemarkLBF(){}; +}; /* LBF */ + +//! @} + +} /* namespace face */ +}/* namespace cv */ + +#endif diff --git a/IPL/include/opencv/opencv2/face/facemark_train.hpp b/IPL/include/opencv/opencv2/face/facemark_train.hpp new file mode 100644 index 0000000..d6e27e9 --- /dev/null +++ b/IPL/include/opencv/opencv2/face/facemark_train.hpp @@ -0,0 +1,392 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +/* +This file was part of GSoC Project: Facemark API for OpenCV +Final report: https://gist.github.com/kurnianggoro/74de9121e122ad0bd825176751d47ecc +Student: Laksono Kurnianggoro +Mentor: Delia Passalacqua +*/ + +#ifndef __OPENCV_FACELANDMARKTRAIN_HPP__ +#define __OPENCV_FACELANDMARKTRAIN_HPP__ + +/** +@defgroup face Face Analysis +- @ref tutorial_table_of_content_facemark +- The Facemark API +*/ + +#include "opencv2/face/facemark.hpp" +#include "opencv2/objdetect.hpp" +#include +#include + + +namespace cv { +namespace face { + +//! @addtogroup face +//! @{ + +typedef bool(*FN_FaceDetector)(InputArray, OutputArray, void* userData); + +struct CParams{ + String cascade; //!< the face detector + double scaleFactor; //!< Parameter specifying how much the image size is reduced at each image scale. + int minNeighbors; //!< Parameter specifying how many neighbors each candidate rectangle should have to retain it. + Size minSize; //!< Minimum possible object size. + Size maxSize; //!< Maximum possible object size. + + CV_EXPORTS CParams( + String cascade_model, + double sf = 1.1, + int minN = 3, + Size minSz = Size(30, 30), + Size maxSz = Size() + ); + + CascadeClassifier face_cascade; +}; + +/** @brief Default face detector +This function is mainly utilized by the implementation of a Facemark Algorithm. +End users are advised to use function Facemark::getFaces which can be manually defined +and circumvented to the algorithm by Facemark::setFaceDetector. + +@param image The input image to be processed. +@param faces Output of the function which represent region of interest of the detected faces. +Each face is stored in cv::Rect container. +@param params detector parameters + +Example of usage +@code +std::vector faces; +CParams params("haarcascade_frontalface_alt.xml"); +cv::face::getFaces(frame, faces, ¶ms); +for(int j=0;j images_train; +std::vector landmarks_train; +loadDatasetList(imageFiles,ptsFiles,images_train,landmarks_train); +@endcode +*/ +CV_EXPORTS_W bool loadDatasetList(String imageList, + String annotationList, + std::vector & images, + std::vector & annotations); + +/** @brief A utility to load facial landmark dataset from a single file. + +@param filename The filename of a file that contains the dataset information. +Each line contains the filename of an image followed by +pairs of x and y values of facial landmarks points separated by a space. +Example +@code +/home/user/ibug/image_003_1.jpg 336.820955 240.864510 334.238298 260.922709 335.266918 ... +/home/user/ibug/image_005_1.jpg 376.158428 230.845712 376.736984 254.924635 383.265403 ... +@endcode +@param images A vector where each element represent the filename of image in the dataset. +Images are not loaded by default to save the memory. +@param facePoints The loaded landmark points for all training data. +@param delim Delimiter between each element, the default value is a whitespace. +@param offset An offset value to adjust the loaded points. + +Example of usage +@code +cv::String imageFiles = "../data/images_train.txt"; +cv::String ptsFiles = "../data/points_train.txt"; +std::vector images; +std::vector > facePoints; +loadTrainingData(imageFiles, ptsFiles, images, facePoints, 0.0f); +@endcode +*/ +CV_EXPORTS_W bool loadTrainingData( String filename , std::vector & images, + OutputArray facePoints, + char delim = ' ', float offset = 0.0f); + +/** @brief A utility to load facial landmark information from the dataset. + +@param imageList A file contains the list of image filenames in the training dataset. +@param groundTruth A file contains the list of filenames +where the landmarks points information are stored. +The content in each file should follow the standard format (see face::loadFacePoints). +@param images A vector where each element represent the filename of image in the dataset. +Images are not loaded by default to save the memory. +@param facePoints The loaded landmark points for all training data. +@param offset An offset value to adjust the loaded points. + +Example of usage +@code +cv::String imageFiles = "../data/images_train.txt"; +cv::String ptsFiles = "../data/points_train.txt"; +std::vector images; +std::vector > facePoints; +loadTrainingData(imageFiles, ptsFiles, images, facePoints, 0.0f); +@endcode + +example of content in the images_train.txt +@code +/home/user/ibug/image_003_1.jpg +/home/user/ibug/image_004_1.jpg +/home/user/ibug/image_005_1.jpg +/home/user/ibug/image_006.jpg +@endcode + +example of content in the points_train.txt +@code +/home/user/ibug/image_003_1.pts +/home/user/ibug/image_004_1.pts +/home/user/ibug/image_005_1.pts +/home/user/ibug/image_006.pts +@endcode +*/ +CV_EXPORTS_W bool loadTrainingData( String imageList, String groundTruth, + std::vector & images, + OutputArray facePoints, + float offset = 0.0f); + +/** @brief This function extracts the data for training from .txt files which contains the corresponding image name and landmarks. +*The first file in each file should give the path of the image whose +*landmarks are being described in the file. Then in the subsequent +*lines there should be coordinates of the landmarks in the image +*i.e each line should be of the form x,y +*where x represents the x coordinate of the landmark and y represents +*the y coordinate of the landmark. +* +*For reference you can see the files as provided in the +*HELEN dataset +* +* @param filename A vector of type cv::String containing name of the .txt files. +* @param trainlandmarks A vector of type cv::Point2f that would store shape or landmarks of all images. +* @param trainimages A vector of type cv::String which stores the name of images whose landmarks are tracked +* @returns A boolean value. It returns true when it reads the data successfully and false otherwise +*/ +CV_EXPORTS_W bool loadTrainingData(std::vector filename,std::vector< std::vector > + &trainlandmarks,std::vector & trainimages); + +/** @brief A utility to load facial landmark information from a given file. + +@param filename The filename of file contains the facial landmarks data. +@param points The loaded facial landmark points. +@param offset An offset value to adjust the loaded points. + +Example of usage +@code +std::vector points; +face::loadFacePoints("filename.txt", points, 0.0f); +@endcode + +The annotation file should follow the default format which is +@code +version: 1 +n_points: 68 +{ +212.716603 499.771793 +230.232816 566.290071 +... +} +@endcode +where n_points is the number of points considered +and each point is represented as its position in x and y. +*/ +CV_EXPORTS_W bool loadFacePoints( String filename, OutputArray points, + float offset = 0.0f); + +/** @brief Utility to draw the detected facial landmark points + +@param image The input image to be processed. +@param points Contains the data of points which will be drawn. +@param color The color of points in BGR format represented by cv::Scalar. + +Example of usage +@code +std::vector faces; +std::vector > landmarks; +facemark->getFaces(img, faces); +facemark->fit(img, faces, landmarks); +for(int j=0;j facemark = FacemarkLBF::create(); +@endcode + + +The typical pipeline for facemark detection is listed as follows: +- (Non-mandatory) Set a user defined face detection using FacemarkTrain::setFaceDetector. + The facemark algorithms are designed to fit the facial points into a face. + Therefore, the face information should be provided to the facemark algorithm. + Some algorithms might provides a default face recognition function. + However, the users might prefer to use their own face detector to obtains the best possible detection result. +- (Non-mandatory) Training the model for a specific algorithm using FacemarkTrain::training. + In this case, the model should be automatically saved by the algorithm. + If the user already have a trained model, then this part can be omitted. +- Load the trained model using Facemark::loadModel. +- Perform the fitting via the Facemark::fit. +*/ +class CV_EXPORTS_W FacemarkTrain : public Facemark +{ +public: + /** @brief Add one training sample to the trainer. + + @param image Input image. + @param landmarks The ground-truth of facial landmarks points corresponds to the image. + + Example of usage + @code + String imageFiles = "../data/images_train.txt"; + String ptsFiles = "../data/points_train.txt"; + std::vector images_train; + std::vector landmarks_train; + + // load the list of dataset: image paths and landmark file paths + loadDatasetList(imageFiles,ptsFiles,images_train,landmarks_train); + + Mat image; + std::vector facial_points; + for(size_t i=0;iaddTrainingSample(image, facial_points); + } + @endcode + + The contents in the training files should follows the standard format. + Here are examples for the contents in these files. + example of content in the images_train.txt + @code + /home/user/ibug/image_003_1.jpg + /home/user/ibug/image_004_1.jpg + /home/user/ibug/image_005_1.jpg + /home/user/ibug/image_006.jpg + @endcode + + example of content in the points_train.txt + @code + /home/user/ibug/image_003_1.pts + /home/user/ibug/image_004_1.pts + /home/user/ibug/image_005_1.pts + /home/user/ibug/image_006.pts + @endcode + + */ + virtual bool addTrainingSample(InputArray image, InputArray landmarks)=0; + + /** @brief Trains a Facemark algorithm using the given dataset. + Before the training process, training samples should be added to the trainer + using face::addTrainingSample function. + + @param parameters Optional extra parameters (algorithm dependent). + + Example of usage + @code + FacemarkLBF::Params params; + params.model_filename = "ibug68.model"; // filename to save the trained model + Ptr facemark = FacemarkLBF::create(params); + + // add training samples (see Facemark::addTrainingSample) + + facemark->training(); + @endcode + */ + + virtual void training(void* parameters=0)=0; + + /** @brief Set a user defined face detector for the Facemark algorithm. + @param detector The user defined face detector function + @param userData Detector parameters + + Example of usage + @code + MyDetectorParameters detectorParameters(...); + facemark->setFaceDetector(myDetector, &detectorParameters); + @endcode + + Example of a user defined face detector + @code + bool myDetector( InputArray image, OutputArray faces, void* userData) + { + MyDetectorParameters* params = (MyDetectorParameters*)userData; + // -------- do something -------- + } + @endcode + + TODO Lifetime of detector parameters is uncontrolled. Rework interface design to "Ptr". + */ + virtual bool setFaceDetector(FN_FaceDetector detector, void* userData = 0)=0; + + /** @brief Detect faces from a given image using default or user defined face detector. + Some Algorithm might not provide a default face detector. + + @param image Input image. + @param faces Output of the function which represent region of interest of the detected faces. Each face is stored in cv::Rect container. + + Example of usage + @code + std::vector faces; + facemark->getFaces(img, faces); + for(int j=0;jExample of usage + @code + Ptr facemark = FacemarkAAM::create(); + facemark->loadModel("AAM.yml"); + + FacemarkAAM::Data data; + facemark->getData(&data); + std::vector s0 = data.s0; + + cout< getProjections() const = 0; - CV_WRAP virtual cv::Mat getLabels() const = 0; - CV_WRAP virtual cv::Mat getEigenValues() const = 0; - CV_WRAP virtual cv::Mat getEigenVectors() const = 0; - CV_WRAP virtual cv::Mat getMean() const = 0; + CV_WRAP void setThreshold(double val) CV_OVERRIDE; + CV_WRAP std::vector getProjections() const; + CV_WRAP cv::Mat getLabels() const; + CV_WRAP cv::Mat getEigenValues() const; + CV_WRAP cv::Mat getEigenVectors() const; + CV_WRAP cv::Mat getMean() const; + + virtual void read(const FileNode& fn) CV_OVERRIDE; + virtual void write(FileStorage& fs) const CV_OVERRIDE; + virtual bool empty() const CV_OVERRIDE; + + using FaceRecognizer::read; + using FaceRecognizer::write; + +protected: + int _num_components; + double _threshold; + std::vector _projections; + Mat _labels; + Mat _eigenvectors; + Mat _eigenvalues; + Mat _mean; +}; + +class CV_EXPORTS_W EigenFaceRecognizer : public BasicFaceRecognizer +{ +public: + /** + @param num_components The number of components (read: Eigenfaces) kept for this Principal + Component Analysis. As a hint: There's no rule how many components (read: Eigenfaces) should be + kept for good reconstruction capabilities. It is based on your input data, so experiment with the + number. Keeping 80 components should almost always be sufficient. + @param threshold The threshold applied in the prediction. + + ### Notes: + + - Training and prediction must be done on grayscale images, use cvtColor to convert between the + color spaces. + - **THE EIGENFACES METHOD MAKES THE ASSUMPTION, THAT THE TRAINING AND TEST IMAGES ARE OF EQUAL + SIZE.** (caps-lock, because I got so many mails asking for this). You have to make sure your + input data has the correct shape, else a meaningful exception is thrown. Use resize to resize + the images. + - This model does not support updating. + + ### Model internal data: + + - num_components see EigenFaceRecognizer::create. + - threshold see EigenFaceRecognizer::create. + - eigenvalues The eigenvalues for this Principal Component Analysis (ordered descending). + - eigenvectors The eigenvectors for this Principal Component Analysis (ordered by their + eigenvalue). + - mean The sample mean calculated from the training data. + - projections The projections of the training data. + - labels The threshold applied in the prediction. If the distance to the nearest neighbor is + larger than the threshold, this method returns -1. + */ + CV_WRAP static Ptr create(int num_components = 0, double threshold = DBL_MAX); +}; + +class CV_EXPORTS_W FisherFaceRecognizer : public BasicFaceRecognizer +{ +public: + /** + @param num_components The number of components (read: Fisherfaces) kept for this Linear + Discriminant Analysis with the Fisherfaces criterion. It's useful to keep all components, that + means the number of your classes c (read: subjects, persons you want to recognize). If you leave + this at the default (0) or set it to a value less-equal 0 or greater (c-1), it will be set to the + correct number (c-1) automatically. + @param threshold The threshold applied in the prediction. If the distance to the nearest neighbor + is larger than the threshold, this method returns -1. + + ### Notes: + + - Training and prediction must be done on grayscale images, use cvtColor to convert between the + color spaces. + - **THE FISHERFACES METHOD MAKES THE ASSUMPTION, THAT THE TRAINING AND TEST IMAGES ARE OF EQUAL + SIZE.** (caps-lock, because I got so many mails asking for this). You have to make sure your + input data has the correct shape, else a meaningful exception is thrown. Use resize to resize + the images. + - This model does not support updating. + + ### Model internal data: + + - num_components see FisherFaceRecognizer::create. + - threshold see FisherFaceRecognizer::create. + - eigenvalues The eigenvalues for this Linear Discriminant Analysis (ordered descending). + - eigenvectors The eigenvectors for this Linear Discriminant Analysis (ordered by their + eigenvalue). + - mean The sample mean calculated from the training data. + - projections The projections of the training data. + - labels The labels corresponding to the projections. + */ + CV_WRAP static Ptr create(int num_components = 0, double threshold = DBL_MAX); }; -/** -@param num_components The number of components (read: Eigenfaces) kept for this Principal -Component Analysis. As a hint: There's no rule how many components (read: Eigenfaces) should be -kept for good reconstruction capabilities. It is based on your input data, so experiment with the -number. Keeping 80 components should almost always be sufficient. -@param threshold The threshold applied in the prediction. - -### Notes: - -- Training and prediction must be done on grayscale images, use cvtColor to convert between the - color spaces. -- **THE EIGENFACES METHOD MAKES THE ASSUMPTION, THAT THE TRAINING AND TEST IMAGES ARE OF EQUAL - SIZE.** (caps-lock, because I got so many mails asking for this). You have to make sure your - input data has the correct shape, else a meaningful exception is thrown. Use resize to resize - the images. -- This model does not support updating. - -### Model internal data: - -- num_components see createEigenFaceRecognizer. -- threshold see createEigenFaceRecognizer. -- eigenvalues The eigenvalues for this Principal Component Analysis (ordered descending). -- eigenvectors The eigenvectors for this Principal Component Analysis (ordered by their - eigenvalue). -- mean The sample mean calculated from the training data. -- projections The projections of the training data. -- labels The threshold applied in the prediction. If the distance to the nearest neighbor is - larger than the threshold, this method returns -1. - */ -CV_EXPORTS_W Ptr createEigenFaceRecognizer(int num_components = 0, double threshold = DBL_MAX); - -/** -@param num_components The number of components (read: Fisherfaces) kept for this Linear -Discriminant Analysis with the Fisherfaces criterion. It's useful to keep all components, that -means the number of your classes c (read: subjects, persons you want to recognize). If you leave -this at the default (0) or set it to a value less-equal 0 or greater (c-1), it will be set to the -correct number (c-1) automatically. -@param threshold The threshold applied in the prediction. If the distance to the nearest neighbor -is larger than the threshold, this method returns -1. - -### Notes: - -- Training and prediction must be done on grayscale images, use cvtColor to convert between the - color spaces. -- **THE FISHERFACES METHOD MAKES THE ASSUMPTION, THAT THE TRAINING AND TEST IMAGES ARE OF EQUAL - SIZE.** (caps-lock, because I got so many mails asking for this). You have to make sure your - input data has the correct shape, else a meaningful exception is thrown. Use resize to resize - the images. -- This model does not support updating. - -### Model internal data: - -- num_components see createFisherFaceRecognizer. -- threshold see createFisherFaceRecognizer. -- eigenvalues The eigenvalues for this Linear Discriminant Analysis (ordered descending). -- eigenvectors The eigenvectors for this Linear Discriminant Analysis (ordered by their - eigenvalue). -- mean The sample mean calculated from the training data. -- projections The projections of the training data. -- labels The labels corresponding to the projections. - */ -CV_EXPORTS_W Ptr createFisherFaceRecognizer(int num_components = 0, double threshold = DBL_MAX); class CV_EXPORTS_W LBPHFaceRecognizer : public FaceRecognizer { @@ -118,46 +143,46 @@ class CV_EXPORTS_W LBPHFaceRecognizer : public FaceRecognizer /** @copybrief getNeighbors @see getNeighbors */ CV_WRAP virtual void setNeighbors(int val) = 0; /** @see setThreshold */ - CV_WRAP virtual double getThreshold() const = 0; + CV_WRAP virtual double getThreshold() const CV_OVERRIDE = 0; /** @copybrief getThreshold @see getThreshold */ - CV_WRAP virtual void setThreshold(double val) = 0; + CV_WRAP virtual void setThreshold(double val) CV_OVERRIDE = 0; CV_WRAP virtual std::vector getHistograms() const = 0; CV_WRAP virtual cv::Mat getLabels() const = 0; -}; -/** -@param radius The radius used for building the Circular Local Binary Pattern. The greater the -radius, the -@param neighbors The number of sample points to build a Circular Local Binary Pattern from. An -appropriate value is to use `8` sample points. Keep in mind: the more sample points you include, -the higher the computational cost. -@param grid_x The number of cells in the horizontal direction, 8 is a common value used in -publications. The more cells, the finer the grid, the higher the dimensionality of the resulting -feature vector. -@param grid_y The number of cells in the vertical direction, 8 is a common value used in -publications. The more cells, the finer the grid, the higher the dimensionality of the resulting -feature vector. -@param threshold The threshold applied in the prediction. If the distance to the nearest neighbor -is larger than the threshold, this method returns -1. - -### Notes: - -- The Circular Local Binary Patterns (used in training and prediction) expect the data given as - grayscale images, use cvtColor to convert between the color spaces. -- This model supports updating. - -### Model internal data: - -- radius see createLBPHFaceRecognizer. -- neighbors see createLBPHFaceRecognizer. -- grid_x see createLBPHFaceRecognizer. -- grid_y see createLBPHFaceRecognizer. -- threshold see createLBPHFaceRecognizer. -- histograms Local Binary Patterns Histograms calculated from the given training data (empty if - none was given). -- labels Labels corresponding to the calculated Local Binary Patterns Histograms. - */ -CV_EXPORTS_W Ptr createLBPHFaceRecognizer(int radius=1, int neighbors=8, int grid_x=8, int grid_y=8, double threshold = DBL_MAX); + /** + @param radius The radius used for building the Circular Local Binary Pattern. The greater the + radius, the smoother the image but more spatial information you can get. + @param neighbors The number of sample points to build a Circular Local Binary Pattern from. An + appropriate value is to use `8` sample points. Keep in mind: the more sample points you include, + the higher the computational cost. + @param grid_x The number of cells in the horizontal direction, 8 is a common value used in + publications. The more cells, the finer the grid, the higher the dimensionality of the resulting + feature vector. + @param grid_y The number of cells in the vertical direction, 8 is a common value used in + publications. The more cells, the finer the grid, the higher the dimensionality of the resulting + feature vector. + @param threshold The threshold applied in the prediction. If the distance to the nearest neighbor + is larger than the threshold, this method returns -1. + + ### Notes: + + - The Circular Local Binary Patterns (used in training and prediction) expect the data given as + grayscale images, use cvtColor to convert between the color spaces. + - This model supports updating. + + ### Model internal data: + + - radius see LBPHFaceRecognizer::create. + - neighbors see LBPHFaceRecognizer::create. + - grid_x see LLBPHFaceRecognizer::create. + - grid_y see LBPHFaceRecognizer::create. + - threshold see LBPHFaceRecognizer::create. + - histograms Local Binary Patterns Histograms calculated from the given training data (empty if + none was given). + - labels Labels corresponding to the calculated Local Binary Patterns Histograms. + */ + CV_WRAP static Ptr create(int radius=1, int neighbors=8, int grid_x=8, int grid_y=8, double threshold = DBL_MAX); +}; //! @} diff --git a/IPL/include/opencv/opencv2/face/mace.hpp b/IPL/include/opencv/opencv2/face/mace.hpp new file mode 100644 index 0000000..ba3ec86 --- /dev/null +++ b/IPL/include/opencv/opencv2/face/mace.hpp @@ -0,0 +1,114 @@ +// This file is part of the OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#ifndef __mace_h_onboard__ +#define __mace_h_onboard__ + +#include "opencv2/core.hpp" + + +namespace cv { +namespace face { + +//! @addtogroup face +//! @{ + + +/** +@brief Minimum Average Correlation Energy Filter + useful for authentication with (cancellable) biometrical features. + (does not need many positives to train (10-50), and no negatives at all, also robust to noise/salting) + + see also: @cite Savvides04 + + this implementation is largely based on: https://code.google.com/archive/p/pam-face-authentication (GSOC 2009) + + use it like: + @code + + Ptr mace = face::MACE::create(64); + + vector pos_images = ... + mace->train(pos_images); + + Mat query = ... + bool same = mace->same(query); + + @endcode + + you can also use two-factor authentication, with an additional passphrase: + + @code + String owners_passphrase = "ilikehotdogs"; + Ptr mace = face::MACE::create(64); + mace->salt(owners_passphrase); + vector pos_images = ... + mace->train(pos_images); + + // now, users have to give a valid passphrase, along with the image: + Mat query = ... + cout << "enter passphrase: "; + string pass; + getline(cin, pass); + mace->salt(pass); + bool same = mace->same(query); + @endcode + + save/load your model: + @code + Ptr mace = face::MACE::create(64); + mace->train(pos_images); + mace->save("my_mace.xml"); + + // later: + Ptr reloaded = MACE::load("my_mace.xml"); + reloaded->same(some_image); + @endcode + +*/ + +class CV_EXPORTS_W MACE : public cv::Algorithm +{ +public: + /** + @brief optionally encrypt images with random convolution + @param passphrase a crc64 random seed will get generated from this + */ + CV_WRAP virtual void salt(const cv::String &passphrase) = 0; + + /** + @brief train it on positive features + compute the mace filter: `h = D(-1) * X * (X(+) * D(-1) * X)(-1) * C` + also calculate a minimal threshold for this class, the smallest self-similarity from the train images + @param images a vector with the train images + */ + CV_WRAP virtual void train(cv::InputArrayOfArrays images) = 0; + + /** + @brief correlate query img and threshold to min class value + @param query a Mat with query image + */ + CV_WRAP virtual bool same(cv::InputArray query) const = 0; + + + /** + @brief constructor + @param filename build a new MACE instance from a pre-serialized FileStorage + @param objname (optional) top-level node in the FileStorage + */ + CV_WRAP static cv::Ptr load(const String &filename, const String &objname=String()); + + /** + @brief constructor + @param IMGSIZE images will get resized to this (should be an even number) + */ + CV_WRAP static cv::Ptr create(int IMGSIZE=64); +}; + +//! @} + +}/* namespace face */ +}/* namespace cv */ + +#endif // __mace_h_onboard__ diff --git a/IPL/include/opencv/opencv2/face/predict_collector.hpp b/IPL/include/opencv/opencv2/face/predict_collector.hpp index 92de6c1..dba3c7e 100644 --- a/IPL/include/opencv/opencv2/face/predict_collector.hpp +++ b/IPL/include/opencv/opencv2/face/predict_collector.hpp @@ -44,262 +44,84 @@ the use of this software, even if advised of the possibility of such damage. #ifndef __OPENCV_PREDICT_COLLECTOR_HPP__ #define __OPENCV_PREDICT_COLLECTOR_HPP__ -#include -#include + #include #include -#include "opencv2/core/cvdef.h" -#include "opencv2/core/cvstd.hpp" -#undef emit //fix for qt +#include +#include + +#include "opencv2/core/base.hpp" + namespace cv { namespace face { //! @addtogroup face //! @{ /** @brief Abstract base class for all strategies of prediction result handling */ -class CV_EXPORTS_W PredictCollector { -protected: - double _threshold; - int _size; - int _state; - int _excludeLabel; - double _distanceKoef; - double _minthreshold; +class CV_EXPORTS_W PredictCollector +{ public: - /** @brief creates new predict collector with given threshold */ - PredictCollector(double threshold = DBL_MAX) { - _threshold = threshold; - _excludeLabel = 0; - _distanceKoef = 1; - _minthreshold = -1; - } - CV_WRAP virtual ~PredictCollector() {} + virtual ~PredictCollector() {} - /** @brief called once at start of recognition + /** @brief Interface method called by face recognizer before results processing @param size total size of prediction evaluation that recognizer could perform - @param state user defined send-to-back optional value to allow multi-thread, multi-session or aggregation scenarios - */ - CV_WRAP virtual void init(const int size, const int state = 0); - - /** @brief called by recognizer prior to emit to decide if prediction require emiting - @param label current predicted label - @param dist current predicted distance - @param state back send state parameter of prediction session - @return true if prediction is valid and required for emiting - @note can override given label and distance to another values */ - CV_WRAP virtual bool defaultFilter(int* label, double* dist, const int state); - - /** @brief extension point for filter - called if base filter executed */ - CV_WRAP virtual bool filter(int* label, double* dist, const int state); + virtual void init(size_t size) { CV_UNUSED(size); } - /** @brief called with every recognition result + /** @brief Interface method called by face recognizer for each result @param label current prediction label @param dist current prediction distance (confidence) - @param state user defined send-to-back optional value to allow multi-thread, multi-session or aggregation scenarios - @return true if recognizer should proceed prediction , false - if recognizer should terminate prediction - */ - CV_WRAP virtual bool emit(const int label, const double dist, const int state = 0); //not abstract while Python generation require non-abstract class - - /** @brief outer interface method to be called from recognizer - @param label current prediction label - @param dist current prediction distance (confidence) - @param state user defined send-to-back optional value to allow multi-thread, multi-session or aggregation scenarios - @note wraps filter and emit calls, not tended to be overriden - */ - CV_WRAP virtual bool collect(int label, double dist, const int state = 0); - - /** - @brief get size of prediction - ### Description - Is set by recognizer and is amount of all available predicts - So we can use it to perform statistic collectors before prediction of whole set - */ - CV_WRAP virtual int getSize(); - - /** @brief set size of prediction */ - CV_WRAP virtual void setSize(int size); - - /** - @brief get state of prediction - ### Description - State is a custom value assigned for prediction session, 0 if it's no-state session - */ - CV_WRAP virtual int getState(); - - /** @brief set state of prediction */ - CV_WRAP virtual void setState(int state); - - /** - @brief returns currently excluded label, 0 if no set - ### Description - We require to exclude label if we want to test card in train set against others */ - CV_WRAP virtual int getExcludeLabel(); - - /** @brief set exclude label of prediction */ - CV_WRAP virtual void setExcludeLabel(int excludeLabel); - - /** - @brief returns current distance koeficient (applyed to distance in filter stage) - ### Description - It's required if we want to predict with distinct algorithms in one session - so LBPH, Eigen and Fisher distance are different, but we can provide koef for them to translate to - each other (while their distribuition for same train set is close and started from 0) - Default 1 koef means that distance is not corrected - */ - CV_WRAP virtual double getDistanceKoef(); - - /** @brief set exclude label of prediction */ - CV_WRAP virtual void setDistanceKoef(double distanceKoef); - /** - @brief returns current minimal threshold - ### Description - It's required when we must exclude most closed predictions (for example we - search for close but not same faces - usable for mixed set where doubles exists - in train collection) - */ - CV_WRAP virtual double getMinThreshold(); - - /** @brief set minimal threshold for prediction */ - CV_WRAP virtual void setMinThreshold(double minthreshold); - + virtual bool collect(int label, double dist) = 0; }; -/** @brief default predict collector that trace minimal distance with treshhold checking (that is default behavior for most predict logic) -*/ -class CV_EXPORTS_W MinDistancePredictCollector : public PredictCollector { -private: - int _label; - double _dist; -public: - /** @brief creates new MinDistancePredictCollector with given threshold */ - CV_WRAP MinDistancePredictCollector(double threshold = DBL_MAX) : PredictCollector(threshold) { - _label = -1; - _dist = DBL_MAX; - }; - CV_WRAP bool emit(const int label, const double dist, const int state = 0); - CV_WRAP bool filter(int* label, double* dist, const int state); - /** @brief result label, -1 if not found */ - CV_WRAP int getLabel() const; - /** @brief result distance (confidence) DBL_MAX if not found */ - CV_WRAP double getDist() const; - /** @brief factory method to create cv-pointers to MinDistancePredictCollector */ - CV_WRAP static Ptr create(double threshold = DBL_MAX); -}; +/** @brief Default predict collector -/** -@brief Collects top N most close predictions -@note Prevent doubling of same label - if one label is occured twice - most closed distance value will be set +Trace minimal distance with treshhold checking (that is default behavior for most predict logic) */ -class CV_EXPORTS_W TopNPredictCollector : public PredictCollector { -private: - size_t _size; - Ptr > > _idx; +class CV_EXPORTS_W StandardCollector : public PredictCollector +{ public: - CV_WRAP TopNPredictCollector(size_t size = 5, double threshold = DBL_MAX) : PredictCollector(threshold) { - _size = size; - _idx = Ptr > >(new std::list >); + struct PredictResult + { + int label; + double distance; + PredictResult(int label_ = -1, double distance_ = DBL_MAX) : label(label_), distance(distance_) {} }; - CV_WRAP bool emit(const int label, const double dist, const int state = 0); - CV_WRAP bool filter(int* label, double* dist, const int state); - Ptr > > getResult(); - CV_WRAP std::vector > getResultVector(); // pythonable version - CV_WRAP static Ptr create(size_t size = 5, double threshold = DBL_MAX); -}; - - -/** -@brief Collects all predict results to single vector -@note this collector not analyze double labels in emit, it's raw copy of source prediction result, -remember that filter is still applyed so you can use min/max threshold , distanceKoef and excludeLabel -*/ -class CV_EXPORTS_W VectorPredictCollector : public PredictCollector { -private: - Ptr > > _idx; -public: - CV_WRAP static const int DEFAULT_SIZE = 5; // top 5 by default - CV_WRAP VectorPredictCollector(double threshold = DBL_MAX) : PredictCollector(threshold) { - _idx = Ptr > >(new std::vector >); - } - CV_WRAP bool emit(const int label, const double dist, const int state = 0); - Ptr > > getResult(); - CV_WRAP std::vector > getResultVector(); // pythonable version - CV_WRAP static Ptr create(double threshold = DBL_MAX); -}; - - -/** -@brief Collects all predict results to single vector -@note this collector not analyze double labels in emit, it's raw copy of source prediction result, -remember that filter is still applyed so you can use min/max threshold , distanceKoef and excludeLabel -*/ -class CV_EXPORTS_W MapPredictCollector : public PredictCollector { -private: - Ptr > _idx; -public: - CV_WRAP static const int DEFAULT_SIZE = 5; // top 5 by default - CV_WRAP MapPredictCollector(double threshold = DBL_MAX) : PredictCollector(threshold) { - _idx = Ptr >(new std::map); - } - CV_WRAP bool emit(const int label, const double dist, const int state = 0); - Ptr > getResult(); - CV_WRAP std::vector > getResultVector(); // pythonable version - CV_WRAP static Ptr create(double threshold = DBL_MAX); -}; - -/** -@brief Collects basic statistic information about prediction -@note stat predict collector is usefull for determining valid thresholds -on given trained set, additionally it's required to -evaluate unified koefs between algorithms -*/ -class CV_EXPORTS_W StatPredictCollector : public PredictCollector { -private: - double _min; - double _max; - int _count; - double _sum; -public: - CV_WRAP StatPredictCollector(double threshold = DBL_MAX) : PredictCollector(threshold) { - _min = DBL_MAX; - _max = DBL_MIN; - _count = 0; - _sum = 0; - } - CV_WRAP bool emit(const int label, const double dist, const int state = 0); - CV_WRAP double getMin(); - CV_WRAP double getMax(); - CV_WRAP double getSum(); - CV_WRAP int getCount(); - CV_WRAP static Ptr create(double threshold = DBL_MAX); -}; - -/** -@brief evaluates standard deviation of given prediction session over trained set -@note in combine with StatPredictCollector can provide statistically based metrices -for thresholds -*/ -class CV_EXPORTS_W StdPredictCollector : public PredictCollector { -private: - double _avg; - double _n; - double _s; +protected: + double threshold; + PredictResult minRes; + std::vector data; public: - CV_WRAP StdPredictCollector(double threshold = DBL_MAX, double avg = 0) : PredictCollector(threshold) { - _avg = avg; - _n = 0; - _s = 0; - } - CV_WRAP bool emit(const int label, const double dist, const int state = 0); - CV_WRAP double getResult(); - CV_WRAP static Ptr create(double threshold = DBL_MAX, double avg = 0); + /** @brief Constructor + @param threshold_ set threshold + */ + StandardCollector(double threshold_ = DBL_MAX); + /** @brief overloaded interface method */ + void init(size_t size) CV_OVERRIDE; + /** @brief overloaded interface method */ + bool collect(int label, double dist) CV_OVERRIDE; + /** @brief Returns label with minimal distance */ + CV_WRAP int getMinLabel() const; + /** @brief Returns minimal distance value */ + CV_WRAP double getMinDist() const; + /** @brief Return results as vector + @param sorted If set, results will be sorted by distance + Each values is a pair of label and distance. + */ + CV_WRAP std::vector< std::pair > getResults(bool sorted = false) const; + /** @brief Return results as map + Labels are keys, values are minimal distances + */ + std::map getResultsMap() const; + /** @brief Static constructor + @param threshold set threshold + */ + CV_WRAP static Ptr create(double threshold = DBL_MAX); }; - - //! @} } } -#endif \ No newline at end of file +#endif diff --git a/IPL/include/opencv/opencv2/features2d.hpp b/IPL/include/opencv/opencv2/features2d.hpp index 692d3d9..24f0af5 100644 --- a/IPL/include/opencv/opencv2/features2d.hpp +++ b/IPL/include/opencv/opencv2/features2d.hpp @@ -40,11 +40,15 @@ // //M*/ -#ifndef __OPENCV_FEATURES_2D_HPP__ -#define __OPENCV_FEATURES_2D_HPP__ +#ifndef OPENCV_FEATURES_2D_HPP +#define OPENCV_FEATURES_2D_HPP +#include "opencv2/opencv_modules.hpp" #include "opencv2/core.hpp" + +#ifdef HAVE_OPENCV_FLANN #include "opencv2/flann/miniflann.hpp" +#endif /** @defgroup features2d 2D Features Framework @@ -76,6 +80,10 @@ This section describes approaches based on local 2D features and used to categor - (Python) An example using the features2D framework to perform object categorization can be found at opencv_source_code/samples/python/find_obj.py + @defgroup feature2d_hal Hardware Acceleration Layer + @{ + @defgroup features2d_hal_interface Interface + @} @} */ @@ -117,6 +125,10 @@ class CV_EXPORTS KeyPointsFilter * Remove duplicated keypoints. */ static void removeDuplicated( std::vector& keypoints ); + /* + * Remove duplicated keypoints and sort the remaining keypoints + */ + static void removeDuplicatedSorted( std::vector& keypoints ); /* * Retain the specified number of the best keypoints (according to the response) @@ -129,7 +141,11 @@ class CV_EXPORTS KeyPointsFilter /** @brief Abstract base class for 2D image feature detectors and descriptor extractors */ +#ifdef __EMSCRIPTEN__ +class CV_EXPORTS_W Feature2D : public Algorithm +#else class CV_EXPORTS_W Feature2D : public virtual Algorithm +#endif { public: virtual ~Feature2D(); @@ -153,8 +169,8 @@ class CV_EXPORTS_W Feature2D : public virtual Algorithm @param masks Masks for each input image specifying where to look for keypoints (optional). masks[i] is a mask for images[i]. */ - virtual void detect( InputArrayOfArrays images, - std::vector >& keypoints, + CV_WRAP virtual void detect( InputArrayOfArrays images, + CV_OUT std::vector >& keypoints, InputArrayOfArrays masks=noArray() ); /** @brief Computes the descriptors for a set of keypoints detected in an image (first variant) or image set @@ -182,8 +198,8 @@ class CV_EXPORTS_W Feature2D : public virtual Algorithm descriptors computed for a keypoints[i]. Row j is the keypoints (or keypoints[i]) is the descriptor for keypoint j-th keypoint. */ - virtual void compute( InputArrayOfArrays images, - std::vector >& keypoints, + CV_WRAP virtual void compute( InputArrayOfArrays images, + CV_OUT CV_IN_OUT std::vector >& keypoints, OutputArrayOfArrays descriptors ); /** Detects keypoints and computes the descriptors */ @@ -196,8 +212,21 @@ class CV_EXPORTS_W Feature2D : public virtual Algorithm CV_WRAP virtual int descriptorType() const; CV_WRAP virtual int defaultNorm() const; + CV_WRAP void write( const String& fileName ) const; + + CV_WRAP void read( const String& fileName ); + + virtual void write( FileStorage&) const CV_OVERRIDE; + + // see corresponding cv::Algorithm method + CV_WRAP virtual void read( const FileNode&) CV_OVERRIDE; + //! Return true if detector object is empty - CV_WRAP virtual bool empty() const; + CV_WRAP virtual bool empty() const CV_OVERRIDE; + CV_WRAP virtual String getDefaultName() const CV_OVERRIDE; + + // see corresponding cv::Algorithm method + CV_WRAP inline void write(const Ptr& fs, const String& name = String()) const { Algorithm::write(fs, name); } }; /** Feature detectors in OpenCV have wrappers with a common interface that enables you to easily switch @@ -242,6 +271,36 @@ class CV_EXPORTS_W BRISK : public Feature2D @param indexChange index remapping of the bits. */ CV_WRAP static Ptr create(const std::vector &radiusList, const std::vector &numberList, float dMax=5.85f, float dMin=8.2f, const std::vector& indexChange=std::vector()); + + /** @brief The BRISK constructor for a custom pattern, detection threshold and octaves + + @param thresh AGAST detection threshold score. + @param octaves detection octaves. Use 0 to do single scale. + @param radiusList defines the radii (in pixels) where the samples around a keypoint are taken (for + keypoint scale 1). + @param numberList defines the number of sampling points on the sampling circle. Must be the same + size as radiusList.. + @param dMax threshold for the short pairings used for descriptor formation (in pixels for keypoint + scale 1). + @param dMin threshold for the long pairings used for orientation determination (in pixels for + keypoint scale 1). + @param indexChange index remapping of the bits. */ + CV_WRAP static Ptr create(int thresh, int octaves, const std::vector &radiusList, + const std::vector &numberList, float dMax=5.85f, float dMin=8.2f, + const std::vector& indexChange=std::vector()); + CV_WRAP virtual String getDefaultName() const CV_OVERRIDE; + + /** @brief Set detection threshold. + @param threshold AGAST detection threshold score. + */ + CV_WRAP virtual void setThreshold(int threshold) { CV_UNUSED(threshold); return; } + CV_WRAP virtual int getThreshold() const { return -1; } + + /** @brief Set detection octaves. + @param octaves detection octaves. Use 0 to do single scale. + */ + CV_WRAP virtual void setOctaves(int octaves) { CV_UNUSED(octaves); return; } + CV_WRAP virtual int getOctaves() const { return -1; } }; /** @brief Class implementing the ORB (*oriented BRIEF*) keypoint detector and descriptor extractor @@ -254,7 +313,8 @@ k-tuples) are rotated according to the measured orientation). class CV_EXPORTS_W ORB : public Feature2D { public: - enum { kBytes = 32, HARRIS_SCORE=0, FAST_SCORE=1 }; + enum ScoreType { HARRIS_SCORE=0, FAST_SCORE=1 }; + static const int kBytes = 32; /** @brief The ORB constructor @@ -265,10 +325,11 @@ class CV_EXPORTS_W ORB : public Feature2D will mean that to cover certain scale range you will need more pyramid levels and so the speed will suffer. @param nlevels The number of pyramid levels. The smallest level will have linear size equal to - input_image_linear_size/pow(scaleFactor, nlevels). + input_image_linear_size/pow(scaleFactor, nlevels - firstLevel). @param edgeThreshold This is size of the border where the features are not detected. It should roughly match the patchSize parameter. - @param firstLevel It should be 0 in the current implementation. + @param firstLevel The level of pyramid to put source image to. Previous layers are filled + with upscaled source image. @param WTA_K The number of points that produce each element of the oriented BRIEF descriptor. The default value 2 means the BRIEF where we take a random point pair and compare their brightnesses, so we get 0/1 response. Other possible values are 3 and 4. For example, 3 means that we take 3 @@ -284,10 +345,10 @@ class CV_EXPORTS_W ORB : public Feature2D but it is a little faster to compute. @param patchSize size of the patch used by the oriented BRIEF descriptor. Of course, on smaller pyramid layers the perceived image area covered by a feature will be larger. - @param fastThreshold + @param fastThreshold the fast threshold */ CV_WRAP static Ptr create(int nfeatures=500, float scaleFactor=1.2f, int nlevels=8, int edgeThreshold=31, - int firstLevel=0, int WTA_K=2, int scoreType=ORB::HARRIS_SCORE, int patchSize=31, int fastThreshold=20); + int firstLevel=0, int WTA_K=2, ORB::ScoreType scoreType=ORB::HARRIS_SCORE, int patchSize=31, int fastThreshold=20); CV_WRAP virtual void setMaxFeatures(int maxFeatures) = 0; CV_WRAP virtual int getMaxFeatures() const = 0; @@ -307,14 +368,15 @@ class CV_EXPORTS_W ORB : public Feature2D CV_WRAP virtual void setWTA_K(int wta_k) = 0; CV_WRAP virtual int getWTA_K() const = 0; - CV_WRAP virtual void setScoreType(int scoreType) = 0; - CV_WRAP virtual int getScoreType() const = 0; + CV_WRAP virtual void setScoreType(ORB::ScoreType scoreType) = 0; + CV_WRAP virtual ORB::ScoreType getScoreType() const = 0; CV_WRAP virtual void setPatchSize(int patchSize) = 0; CV_WRAP virtual int getPatchSize() const = 0; CV_WRAP virtual void setFastThreshold(int fastThreshold) = 0; CV_WRAP virtual int getFastThreshold() const = 0; + CV_WRAP virtual String getDefaultName() const CV_OVERRIDE; }; /** @brief Maximally stable extremal region extractor @@ -336,12 +398,12 @@ code which is distributed under GPL. class CV_EXPORTS_W MSER : public Feature2D { public: - /** @brief Full consturctor for %MSER detector + /** @brief Full constructor for %MSER detector @param _delta it compares \f$(size_{i}-size_{i-delta})/size_{i-delta}\f$ @param _min_area prune the area which smaller than minArea @param _max_area prune the area which bigger than maxArea - @param _max_variation prune the area have simliar size to its children + @param _max_variation prune the area have similar size to its children @param _min_diversity for color image, trace back to cut off mser with diversity less than min_diversity @param _max_evolution for color image, the evolution steps @param _area_threshold for color image, the area threshold to cause re-initialize @@ -355,13 +417,13 @@ class CV_EXPORTS_W MSER : public Feature2D /** @brief Detect %MSER regions - @param image input image (8UC1, 8UC3 or 8UC4) + @param image input image (8UC1, 8UC3 or 8UC4, must be greater or equal than 3x3) @param msers resulting list of point sets @param bboxes resulting bounding boxes */ CV_WRAP virtual void detectRegions( InputArray image, CV_OUT std::vector >& msers, - std::vector& bboxes ) = 0; + CV_OUT std::vector& bboxes ) = 0; CV_WRAP virtual void setDelta(int delta) = 0; CV_WRAP virtual int getDelta() const = 0; @@ -374,6 +436,42 @@ class CV_EXPORTS_W MSER : public Feature2D CV_WRAP virtual void setPass2Only(bool f) = 0; CV_WRAP virtual bool getPass2Only() const = 0; + CV_WRAP virtual String getDefaultName() const CV_OVERRIDE; +}; + +//! @} features2d_main + +//! @addtogroup features2d_main +//! @{ + +/** @brief Wrapping class for feature detection using the FAST method. : + */ +class CV_EXPORTS_W FastFeatureDetector : public Feature2D +{ +public: + enum DetectorType + { + TYPE_5_8 = 0, TYPE_7_12 = 1, TYPE_9_16 = 2 + }; + enum + { + THRESHOLD = 10000, NONMAX_SUPPRESSION=10001, FAST_N=10002 + }; + + + CV_WRAP static Ptr create( int threshold=10, + bool nonmaxSuppression=true, + FastFeatureDetector::DetectorType type=FastFeatureDetector::TYPE_9_16 ); + + CV_WRAP virtual void setThreshold(int threshold) = 0; + CV_WRAP virtual int getThreshold() const = 0; + + CV_WRAP virtual void setNonmaxSuppression(bool f) = 0; + CV_WRAP virtual bool getNonmaxSuppression() const = 0; + + CV_WRAP virtual void setType(FastFeatureDetector::DetectorType type) = 0; + CV_WRAP virtual FastFeatureDetector::DetectorType getType() const = 0; + CV_WRAP virtual String getDefaultName() const CV_OVERRIDE; }; /** @overload */ @@ -394,32 +492,36 @@ FastFeatureDetector::TYPE_5_8 Detects corners using the FAST algorithm by @cite Rosten06 . -@note In Python API, types are given as cv2.FAST_FEATURE_DETECTOR_TYPE_5_8, -cv2.FAST_FEATURE_DETECTOR_TYPE_7_12 and cv2.FAST_FEATURE_DETECTOR_TYPE_9_16. For corner -detection, use cv2.FAST.detect() method. +@note In Python API, types are given as cv.FAST_FEATURE_DETECTOR_TYPE_5_8, +cv.FAST_FEATURE_DETECTOR_TYPE_7_12 and cv.FAST_FEATURE_DETECTOR_TYPE_9_16. For corner +detection, use cv.FAST.detect() method. */ CV_EXPORTS void FAST( InputArray image, CV_OUT std::vector& keypoints, - int threshold, bool nonmaxSuppression, int type ); + int threshold, bool nonmaxSuppression, FastFeatureDetector::DetectorType type ); //! @} features2d_main //! @addtogroup features2d_main //! @{ -/** @brief Wrapping class for feature detection using the FAST method. : +/** @brief Wrapping class for feature detection using the AGAST method. : */ -class CV_EXPORTS_W FastFeatureDetector : public Feature2D +class CV_EXPORTS_W AgastFeatureDetector : public Feature2D { public: + enum DetectorType + { + AGAST_5_8 = 0, AGAST_7_12d = 1, AGAST_7_12s = 2, OAST_9_16 = 3, + }; + enum { - TYPE_5_8 = 0, TYPE_7_12 = 1, TYPE_9_16 = 2, - THRESHOLD = 10000, NONMAX_SUPPRESSION=10001, FAST_N=10002, + THRESHOLD = 10000, NONMAX_SUPPRESSION = 10001, }; - CV_WRAP static Ptr create( int threshold=10, - bool nonmaxSuppression=true, - int type=FastFeatureDetector::TYPE_9_16 ); + CV_WRAP static Ptr create( int threshold=10, + bool nonmaxSuppression=true, + AgastFeatureDetector::DetectorType type = AgastFeatureDetector::OAST_9_16); CV_WRAP virtual void setThreshold(int threshold) = 0; CV_WRAP virtual int getThreshold() const = 0; @@ -427,8 +529,9 @@ class CV_EXPORTS_W FastFeatureDetector : public Feature2D CV_WRAP virtual void setNonmaxSuppression(bool f) = 0; CV_WRAP virtual bool getNonmaxSuppression() const = 0; - CV_WRAP virtual void setType(int type) = 0; - CV_WRAP virtual int getType() const = 0; + CV_WRAP virtual void setType(AgastFeatureDetector::DetectorType type) = 0; + CV_WRAP virtual AgastFeatureDetector::DetectorType getType() const = 0; + CV_WRAP virtual String getDefaultName() const CV_OVERRIDE; }; /** @overload */ @@ -454,36 +557,7 @@ Detects corners using the AGAST algorithm by @cite mair2010_agast . */ CV_EXPORTS void AGAST( InputArray image, CV_OUT std::vector& keypoints, - int threshold, bool nonmaxSuppression, int type ); -//! @} features2d_main - -//! @addtogroup features2d_main -//! @{ - -/** @brief Wrapping class for feature detection using the AGAST method. : - */ -class CV_EXPORTS_W AgastFeatureDetector : public Feature2D -{ -public: - enum - { - AGAST_5_8 = 0, AGAST_7_12d = 1, AGAST_7_12s = 2, OAST_9_16 = 3, - THRESHOLD = 10000, NONMAX_SUPPRESSION = 10001, - }; - - CV_WRAP static Ptr create( int threshold=10, - bool nonmaxSuppression=true, - int type=AgastFeatureDetector::OAST_9_16 ); - - CV_WRAP virtual void setThreshold(int threshold) = 0; - CV_WRAP virtual int getThreshold() const = 0; - - CV_WRAP virtual void setNonmaxSuppression(bool f) = 0; - CV_WRAP virtual bool getNonmaxSuppression() const = 0; - - CV_WRAP virtual void setType(int type) = 0; - CV_WRAP virtual int getType() const = 0; -}; + int threshold, bool nonmaxSuppression, AgastFeatureDetector::DetectorType type ); /** @brief Wrapping class for feature detection using the goodFeaturesToTrack function. : */ @@ -492,6 +566,8 @@ class CV_EXPORTS_W GFTTDetector : public Feature2D public: CV_WRAP static Ptr create( int maxCorners=1000, double qualityLevel=0.01, double minDistance=1, int blockSize=3, bool useHarrisDetector=false, double k=0.04 ); + CV_WRAP static Ptr create( int maxCorners, double qualityLevel, double minDistance, + int blockSize, int gradiantSize, bool useHarrisDetector=false, double k=0.04 ); CV_WRAP virtual void setMaxFeatures(int maxFeatures) = 0; CV_WRAP virtual int getMaxFeatures() const = 0; @@ -509,6 +585,7 @@ class CV_EXPORTS_W GFTTDetector : public Feature2D CV_WRAP virtual void setK(double k) = 0; CV_WRAP virtual double getK() const = 0; + CV_WRAP virtual String getDefaultName() const CV_OVERRIDE; }; /** @brief Class for extracting blobs from an image. : @@ -575,6 +652,7 @@ class CV_EXPORTS_W SimpleBlobDetector : public Feature2D CV_WRAP static Ptr create(const SimpleBlobDetector::Params ¶meters = SimpleBlobDetector::Params()); + CV_WRAP virtual String getDefaultName() const CV_OVERRIDE; }; //! @} features2d_main @@ -591,7 +669,7 @@ F. Alcantarilla, Adrien Bartoli and Andrew J. Davison. In European Conference on class CV_EXPORTS_W KAZE : public Feature2D { public: - enum + enum DiffusivityType { DIFF_PM_G1 = 0, DIFF_PM_G2 = 1, @@ -612,7 +690,7 @@ class CV_EXPORTS_W KAZE : public Feature2D CV_WRAP static Ptr create(bool extended=false, bool upright=false, float threshold = 0.001f, int nOctaves = 4, int nOctaveLayers = 4, - int diffusivity = KAZE::DIFF_PM_G2); + KAZE::DiffusivityType diffusivity = KAZE::DIFF_PM_G2); CV_WRAP virtual void setExtended(bool extended) = 0; CV_WRAP virtual bool getExtended() const = 0; @@ -629,22 +707,32 @@ class CV_EXPORTS_W KAZE : public Feature2D CV_WRAP virtual void setNOctaveLayers(int octaveLayers) = 0; CV_WRAP virtual int getNOctaveLayers() const = 0; - CV_WRAP virtual void setDiffusivity(int diff) = 0; - CV_WRAP virtual int getDiffusivity() const = 0; + CV_WRAP virtual void setDiffusivity(KAZE::DiffusivityType diff) = 0; + CV_WRAP virtual KAZE::DiffusivityType getDiffusivity() const = 0; + CV_WRAP virtual String getDefaultName() const CV_OVERRIDE; }; -/** @brief Class implementing the AKAZE keypoint detector and descriptor extractor, described in @cite ANB13 . : +/** @brief Class implementing the AKAZE keypoint detector and descriptor extractor, described in @cite ANB13. -@note AKAZE descriptors can only be used with KAZE or AKAZE keypoints. Try to avoid using *extract* -and *detect* instead of *operator()* due to performance reasons. .. [ANB13] Fast Explicit Diffusion -for Accelerated Features in Nonlinear Scale Spaces. Pablo F. Alcantarilla, Jesús Nuevo and Adrien -Bartoli. In British Machine Vision Conference (BMVC), Bristol, UK, September 2013. - */ +@details AKAZE descriptors can only be used with KAZE or AKAZE keypoints. This class is thread-safe. + +@note When you need descriptors use Feature2D::detectAndCompute, which +provides better performance. When using Feature2D::detect followed by +Feature2D::compute scale space pyramid is computed twice. + +@note AKAZE implements T-API. When image is passed as UMat some parts of the algorithm +will use OpenCL. + +@note [ANB13] Fast Explicit Diffusion for Accelerated Features in Nonlinear +Scale Spaces. Pablo F. Alcantarilla, Jesús Nuevo and Adrien Bartoli. In +British Machine Vision Conference (BMVC), Bristol, UK, September 2013. + +*/ class CV_EXPORTS_W AKAZE : public Feature2D { public: // AKAZE descriptor type - enum + enum DescriptorType { DESCRIPTOR_KAZE_UPRIGHT = 2, ///< Upright descriptors, not invariant to rotation DESCRIPTOR_KAZE = 3, @@ -664,13 +752,13 @@ class CV_EXPORTS_W AKAZE : public Feature2D @param diffusivity Diffusivity type. DIFF_PM_G1, DIFF_PM_G2, DIFF_WEICKERT or DIFF_CHARBONNIER */ - CV_WRAP static Ptr create(int descriptor_type=AKAZE::DESCRIPTOR_MLDB, + CV_WRAP static Ptr create(AKAZE::DescriptorType descriptor_type = AKAZE::DESCRIPTOR_MLDB, int descriptor_size = 0, int descriptor_channels = 3, float threshold = 0.001f, int nOctaves = 4, - int nOctaveLayers = 4, int diffusivity = KAZE::DIFF_PM_G2); + int nOctaveLayers = 4, KAZE::DiffusivityType diffusivity = KAZE::DIFF_PM_G2); - CV_WRAP virtual void setDescriptorType(int dtype) = 0; - CV_WRAP virtual int getDescriptorType() const = 0; + CV_WRAP virtual void setDescriptorType(AKAZE::DescriptorType dtype) = 0; + CV_WRAP virtual AKAZE::DescriptorType getDescriptorType() const = 0; CV_WRAP virtual void setDescriptorSize(int dsize) = 0; CV_WRAP virtual int getDescriptorSize() const = 0; @@ -687,8 +775,9 @@ class CV_EXPORTS_W AKAZE : public Feature2D CV_WRAP virtual void setNOctaveLayers(int octaveLayers) = 0; CV_WRAP virtual int getNOctaveLayers() const = 0; - CV_WRAP virtual void setDiffusivity(int diff) = 0; - CV_WRAP virtual int getDiffusivity() const = 0; + CV_WRAP virtual void setDiffusivity(KAZE::DiffusivityType diff) = 0; + CV_WRAP virtual KAZE::DiffusivityType getDiffusivity() const = 0; + CV_WRAP virtual String getDefaultName() const CV_OVERRIDE; }; //! @} features2d_main @@ -714,7 +803,7 @@ template<> struct Accumulator { typedef float Type; }; template struct CV_EXPORTS SL2 { - enum { normType = NORM_L2SQR }; + static const NormTypes normType = NORM_L2SQR; typedef T ValueType; typedef typename Accumulator::Type ResultType; @@ -728,9 +817,9 @@ struct CV_EXPORTS SL2 * Euclidean distance functor */ template -struct CV_EXPORTS L2 +struct L2 { - enum { normType = NORM_L2 }; + static const NormTypes normType = NORM_L2; typedef T ValueType; typedef typename Accumulator::Type ResultType; @@ -744,9 +833,9 @@ struct CV_EXPORTS L2 * Manhattan distance (city block distance) functor */ template -struct CV_EXPORTS L1 +struct L1 { - enum { normType = NORM_L1 }; + static const NormTypes normType = NORM_L1; typedef T ValueType; typedef typename Accumulator::Type ResultType; @@ -771,6 +860,16 @@ an image set. class CV_EXPORTS_W DescriptorMatcher : public Algorithm { public: + enum MatcherType + { + FLANNBASED = 1, + BRUTEFORCE = 2, + BRUTEFORCE_L1 = 3, + BRUTEFORCE_HAMMING = 4, + BRUTEFORCE_HAMMINGLUT = 5, + BRUTEFORCE_SL2 = 6 + }; + virtual ~DescriptorMatcher(); /** @brief Adds descriptors to train a CPU(trainDescCollectionis) or GPU(utrainDescCollectionis) descriptor @@ -789,11 +888,11 @@ class CV_EXPORTS_W DescriptorMatcher : public Algorithm /** @brief Clears the train descriptor collections. */ - CV_WRAP virtual void clear(); + CV_WRAP virtual void clear() CV_OVERRIDE; /** @brief Returns true if there are no train descriptors in the both collections. */ - CV_WRAP virtual bool empty() const; + CV_WRAP virtual bool empty() const CV_OVERRIDE; /** @brief Returns true if the descriptor matcher supports masking permissible matches. */ @@ -868,8 +967,8 @@ class CV_EXPORTS_W DescriptorMatcher : public Algorithm query descriptor and the training descriptor is equal or smaller than maxDistance. Found matches are returned in the distance increasing order. */ - void radiusMatch( InputArray queryDescriptors, InputArray trainDescriptors, - std::vector >& matches, float maxDistance, + CV_WRAP void radiusMatch( InputArray queryDescriptors, InputArray trainDescriptors, + CV_OUT std::vector >& matches, float maxDistance, InputArray mask=noArray(), bool compactResult=false ) const; /** @overload @@ -906,13 +1005,26 @@ class CV_EXPORTS_W DescriptorMatcher : public Algorithm false, the matches vector has the same size as queryDescriptors rows. If compactResult is true, the matches vector does not contain matches for fully masked-out query descriptors. */ - void radiusMatch( InputArray queryDescriptors, std::vector >& matches, float maxDistance, + CV_WRAP void radiusMatch( InputArray queryDescriptors, CV_OUT std::vector >& matches, float maxDistance, InputArrayOfArrays masks=noArray(), bool compactResult=false ); + + CV_WRAP void write( const String& fileName ) const + { + FileStorage fs(fileName, FileStorage::WRITE); + write(fs); + } + + CV_WRAP void read( const String& fileName ) + { + FileStorage fs(fileName, FileStorage::READ); + read(fs.root()); + } // Reads matcher object from a file node - virtual void read( const FileNode& ); + // see corresponding cv::Algorithm method + CV_WRAP virtual void read( const FileNode& ) CV_OVERRIDE; // Writes matcher object to a file storage - virtual void write( FileStorage& ) const; + virtual void write( FileStorage& ) const CV_OVERRIDE; /** @brief Clones the matcher. @@ -920,7 +1032,7 @@ class CV_EXPORTS_W DescriptorMatcher : public Algorithm that is, copies both parameters and train data. If emptyTrainData is true, the method creates an object copy with the current parameters but with empty train data. */ - virtual Ptr clone( bool emptyTrainData=false ) const = 0; + CV_WRAP virtual Ptr clone( bool emptyTrainData=false ) const = 0; /** @brief Creates a descriptor matcher of a given type with the default parameters (using default constructor). @@ -934,6 +1046,13 @@ class CV_EXPORTS_W DescriptorMatcher : public Algorithm - `FlannBased` */ CV_WRAP static Ptr create( const String& descriptorMatcherType ); + + CV_WRAP static Ptr create( const DescriptorMatcher::MatcherType& matcherType ); + + + // see corresponding cv::Algorithm method + CV_WRAP inline void write(const Ptr& fs, const String& name = String()) const { Algorithm::write(fs, name); } + protected: /** * Class to work with descriptors from several images as with one merged matrix. @@ -990,8 +1109,17 @@ sets. class CV_EXPORTS_W BFMatcher : public DescriptorMatcher { public: - /** @brief Brute-force matcher constructor. + /** @brief Brute-force matcher constructor (obsolete). Please use BFMatcher.create() + * + * + */ + CV_WRAP BFMatcher( int normType=NORM_L2, bool crossCheck=false ); + + virtual ~BFMatcher() {} + + virtual bool isMaskSupported() const CV_OVERRIDE { return true; } + /** @brief Brute-force matcher create method. @param normType One of NORM_L1, NORM_L2, NORM_HAMMING, NORM_HAMMING2. L1 and L2 norms are preferable choices for SIFT and SURF descriptors, NORM_HAMMING should be used with ORB, BRISK and BRIEF, NORM_HAMMING2 should be used with ORB when WTA_K==3 or 4 (see ORB::ORB constructor @@ -1003,26 +1131,24 @@ class CV_EXPORTS_W BFMatcher : public DescriptorMatcher pairs. Such technique usually produces best results with minimal number of outliers when there are enough matches. This is alternative to the ratio test, used by D. Lowe in SIFT paper. */ - CV_WRAP BFMatcher( int normType=NORM_L2, bool crossCheck=false ); - virtual ~BFMatcher() {} + CV_WRAP static Ptr create( int normType=NORM_L2, bool crossCheck=false ) ; - virtual bool isMaskSupported() const { return true; } - - virtual Ptr clone( bool emptyTrainData=false ) const; + virtual Ptr clone( bool emptyTrainData=false ) const CV_OVERRIDE; protected: virtual void knnMatchImpl( InputArray queryDescriptors, std::vector >& matches, int k, - InputArrayOfArrays masks=noArray(), bool compactResult=false ); + InputArrayOfArrays masks=noArray(), bool compactResult=false ) CV_OVERRIDE; virtual void radiusMatchImpl( InputArray queryDescriptors, std::vector >& matches, float maxDistance, - InputArrayOfArrays masks=noArray(), bool compactResult=false ); + InputArrayOfArrays masks=noArray(), bool compactResult=false ) CV_OVERRIDE; int normType; bool crossCheck; }; +#if defined(HAVE_OPENCV_FLANN) || defined(CV_DOXYGEN) /** @brief Flann-based descriptor matcher. -This matcher trains flann::Index_ on a train descriptor collection and calls its nearest search +This matcher trains cv::flann::Index on a train descriptor collection and calls its nearest search methods to find the best matches. So, this matcher may be faster when matching a large train collection than the brute force matcher. FlannBasedMatcher does not support masking permissible matches of descriptor sets because flann::Index does not support this. : @@ -1033,27 +1159,29 @@ class CV_EXPORTS_W FlannBasedMatcher : public DescriptorMatcher CV_WRAP FlannBasedMatcher( const Ptr& indexParams=makePtr(), const Ptr& searchParams=makePtr() ); - virtual void add( InputArrayOfArrays descriptors ); - virtual void clear(); + virtual void add( InputArrayOfArrays descriptors ) CV_OVERRIDE; + virtual void clear() CV_OVERRIDE; // Reads matcher object from a file node - virtual void read( const FileNode& ); + virtual void read( const FileNode& ) CV_OVERRIDE; // Writes matcher object to a file storage - virtual void write( FileStorage& ) const; + virtual void write( FileStorage& ) const CV_OVERRIDE; + + virtual void train() CV_OVERRIDE; + virtual bool isMaskSupported() const CV_OVERRIDE; - virtual void train(); - virtual bool isMaskSupported() const; + CV_WRAP static Ptr create(); - virtual Ptr clone( bool emptyTrainData=false ) const; + virtual Ptr clone( bool emptyTrainData=false ) const CV_OVERRIDE; protected: static void convertToDMatches( const DescriptorCollection& descriptors, const Mat& indices, const Mat& distances, std::vector >& matches ); virtual void knnMatchImpl( InputArray queryDescriptors, std::vector >& matches, int k, - InputArrayOfArrays masks=noArray(), bool compactResult=false ); + InputArrayOfArrays masks=noArray(), bool compactResult=false ) CV_OVERRIDE; virtual void radiusMatchImpl( InputArray queryDescriptors, std::vector >& matches, float maxDistance, - InputArrayOfArrays masks=noArray(), bool compactResult=false ); + InputArrayOfArrays masks=noArray(), bool compactResult=false ) CV_OVERRIDE; Ptr indexParams; Ptr searchParams; @@ -1063,6 +1191,8 @@ class CV_EXPORTS_W FlannBasedMatcher : public DescriptorMatcher int addedDescCount; }; +#endif + //! @} features2d_match /****************************************************************************************\ @@ -1072,20 +1202,20 @@ class CV_EXPORTS_W FlannBasedMatcher : public DescriptorMatcher //! @addtogroup features2d_draw //! @{ -struct CV_EXPORTS DrawMatchesFlags +enum struct DrawMatchesFlags { - enum{ DEFAULT = 0, //!< Output image matrix will be created (Mat::create), - //!< i.e. existing memory of output image may be reused. - //!< Two source image, matches and single keypoints will be drawn. - //!< For each keypoint only the center point will be drawn (without - //!< the circle around keypoint with keypoint size and orientation). - DRAW_OVER_OUTIMG = 1, //!< Output image matrix will not be created (Mat::create). - //!< Matches will be drawn on existing content of output image. - NOT_DRAW_SINGLE_POINTS = 2, //!< Single keypoints will not be drawn. - DRAW_RICH_KEYPOINTS = 4 //!< For each keypoint the circle around keypoint with keypoint size and - //!< orientation will be drawn. - }; + DEFAULT = 0, //!< Output image matrix will be created (Mat::create), + //!< i.e. existing memory of output image may be reused. + //!< Two source image, matches and single keypoints will be drawn. + //!< For each keypoint only the center point will be drawn (without + //!< the circle around keypoint with keypoint size and orientation). + DRAW_OVER_OUTIMG = 1, //!< Output image matrix will not be created (Mat::create). + //!< Matches will be drawn on existing content of output image. + NOT_DRAW_SINGLE_POINTS = 2, //!< Single keypoints will not be drawn. + DRAW_RICH_KEYPOINTS = 4 //!< For each keypoint the circle around keypoint with keypoint size and + //!< orientation will be drawn. }; +CV_ENUM_FLAGS(DrawMatchesFlags) /** @brief Draws keypoints. @@ -1098,12 +1228,12 @@ output image. See possible flags bit values below. DrawMatchesFlags. See details above in drawMatches . @note -For Python API, flags are modified as cv2.DRAW_MATCHES_FLAGS_DEFAULT, -cv2.DRAW_MATCHES_FLAGS_DRAW_RICH_KEYPOINTS, cv2.DRAW_MATCHES_FLAGS_DRAW_OVER_OUTIMG, -cv2.DRAW_MATCHES_FLAGS_NOT_DRAW_SINGLE_POINTS +For Python API, flags are modified as cv.DRAW_MATCHES_FLAGS_DEFAULT, +cv.DRAW_MATCHES_FLAGS_DRAW_RICH_KEYPOINTS, cv.DRAW_MATCHES_FLAGS_DRAW_OVER_OUTIMG, +cv.DRAW_MATCHES_FLAGS_NOT_DRAW_SINGLE_POINTS */ CV_EXPORTS_W void drawKeypoints( InputArray image, const std::vector& keypoints, InputOutputArray outImage, - const Scalar& color=Scalar::all(-1), int flags=DrawMatchesFlags::DEFAULT ); + const Scalar& color=Scalar::all(-1), DrawMatchesFlags flags=DrawMatchesFlags::DEFAULT ); /** @brief Draws the found matches of keypoints from two images. @@ -1131,14 +1261,14 @@ CV_EXPORTS_W void drawMatches( InputArray img1, const std::vector& key InputArray img2, const std::vector& keypoints2, const std::vector& matches1to2, InputOutputArray outImg, const Scalar& matchColor=Scalar::all(-1), const Scalar& singlePointColor=Scalar::all(-1), - const std::vector& matchesMask=std::vector(), int flags=DrawMatchesFlags::DEFAULT ); + const std::vector& matchesMask=std::vector(), DrawMatchesFlags flags=DrawMatchesFlags::DEFAULT ); /** @overload */ CV_EXPORTS_AS(drawMatchesKnn) void drawMatches( InputArray img1, const std::vector& keypoints1, InputArray img2, const std::vector& keypoints2, const std::vector >& matches1to2, InputOutputArray outImg, const Scalar& matchColor=Scalar::all(-1), const Scalar& singlePointColor=Scalar::all(-1), - const std::vector >& matchesMask=std::vector >(), int flags=DrawMatchesFlags::DEFAULT ); + const std::vector >& matchesMask=std::vector >(), DrawMatchesFlags flags=DrawMatchesFlags::DEFAULT ); //! @} features2d_draw @@ -1228,8 +1358,8 @@ class CV_EXPORTS_W BOWKMeansTrainer : public BOWTrainer virtual ~BOWKMeansTrainer(); // Returns trained vocabulary (i.e. cluster centers). - CV_WRAP virtual Mat cluster() const; - CV_WRAP virtual Mat cluster( const Mat& descriptors ) const; + CV_WRAP virtual Mat cluster() const CV_OVERRIDE; + CV_WRAP virtual Mat cluster( const Mat& descriptors ) const CV_OVERRIDE; protected: diff --git a/IPL/include/opencv/opencv2/features2d/hal/interface.h b/IPL/include/opencv/opencv2/features2d/hal/interface.h new file mode 100644 index 0000000..bc3b084 --- /dev/null +++ b/IPL/include/opencv/opencv2/features2d/hal/interface.h @@ -0,0 +1,33 @@ +#ifndef OPENCV_FEATURE2D_HAL_INTERFACE_H +#define OPENCV_FEATURE2D_HAL_INTERFACE_H + +#include "opencv2/core/cvdef.h" +//! @addtogroup features2d_hal_interface +//! @{ + +//! @name Fast feature detector types +//! @sa cv::FastFeatureDetector +//! @{ +#define CV_HAL_TYPE_5_8 0 +#define CV_HAL_TYPE_7_12 1 +#define CV_HAL_TYPE_9_16 2 +//! @} + +//! @name Key point +//! @sa cv::KeyPoint +//! @{ +struct CV_EXPORTS cvhalKeyPoint +{ + float x; + float y; + float size; + float angle; + float response; + int octave; + int class_id; +}; +//! @} + +//! @} + +#endif diff --git a/IPL/include/opencv/opencv2/flann.hpp b/IPL/include/opencv/opencv2/flann.hpp index 4f92d57..887759e 100644 --- a/IPL/include/opencv/opencv2/flann.hpp +++ b/IPL/include/opencv/opencv2/flann.hpp @@ -40,8 +40,8 @@ // //M*/ -#ifndef _OPENCV_FLANN_HPP_ -#define _OPENCV_FLANN_HPP_ +#ifndef OPENCV_FLANN_HPP +#define OPENCV_FLANN_HPP #include "opencv2/core.hpp" #include "opencv2/flann/miniflann.hpp" @@ -59,7 +59,7 @@ can be found in @cite Muja2009 . namespace cvflann { CV_EXPORTS flann_distance_t flann_distance_type(); - FLANN_DEPRECATED CV_EXPORTS void set_distance_type(flann_distance_t distance_type, int order); + CV_DEPRECATED CV_EXPORTS void set_distance_type(flann_distance_t distance_type, int order); } @@ -103,6 +103,58 @@ using ::cvflann::KL_Divergence; /** @brief The FLANN nearest neighbor index class. This class is templated with the type of elements for which the index is built. + +`Distance` functor specifies the metric to be used to calculate the distance between two points. +There are several `Distance` functors that are readily available: + +cv::cvflann::L2_Simple - Squared Euclidean distance functor. +This is the simpler, unrolled version. This is preferable for very low dimensionality data (eg 3D points) + +cv::flann::L2 - Squared Euclidean distance functor, optimized version. + +cv::flann::L1 - Manhattan distance functor, optimized version. + +cv::flann::MinkowskiDistance - The Minkowsky distance functor. +This is highly optimised with loop unrolling. +The computation of squared root at the end is omitted for efficiency. + +cv::flann::MaxDistance - The max distance functor. It computes the +maximum distance between two vectors. This distance is not a valid kdtree distance, it's not +dimensionwise additive. + +cv::flann::HammingLUT - %Hamming distance functor. It counts the bit +differences between two strings using a lookup table implementation. + +cv::flann::Hamming - %Hamming distance functor. Population count is +performed using library calls, if available. Lookup table implementation is used as a fallback. + +cv::flann::Hamming2 - %Hamming distance functor. Population count is +implemented in 12 arithmetic operations (one of which is multiplication). + +cv::flann::HistIntersectionDistance - The histogram +intersection distance functor. + +cv::flann::HellingerDistance - The Hellinger distance functor. + +cv::flann::ChiSquareDistance - The chi-square distance functor. + +cv::flann::KL_Divergence - The Kullback-Leibler divergence functor. + +Although the provided implementations cover a vast range of cases, it is also possible to use +a custom implementation. The distance functor is a class whose `operator()` computes the distance +between two features. If the distance is also a kd-tree compatible distance, it should also provide an +`accum_dist()` method that computes the distance between individual feature dimensions. + +In addition to `operator()` and `accum_dist()`, a distance functor should also define the +`ElementType` and the `ResultType` as the types of the elements it operates on and the type of the +result it computes. If a distance functor can be used as a kd-tree distance (meaning that the full +distance between a pair of features can be accumulated from the partial distances between the +individual dimensions) a typedef `is_kdtree_distance` should be present inside the distance functor. +If the distance is not a kd-tree distance, but it's a distance in a vector space (the individual +dimensions of the elements it operates on can be accessed independently) a typedef +`is_vector_space_distance` should be defined inside the functor. If neither typedef is defined, the +distance is assumed to be a metric distance and will only be used with indexes operating on +generic metric distances. */ template class GenericIndex @@ -217,6 +269,17 @@ class GenericIndex std::vector& dists, int knn, const ::cvflann::SearchParams& params); void knnSearch(const Mat& queries, Mat& indices, Mat& dists, int knn, const ::cvflann::SearchParams& params); + /** @brief Performs a radius nearest neighbor search for a given query point using the index. + + @param query The query point. + @param indices Vector that will contain the indices of the nearest neighbors found. + @param dists Vector that will contain the distances to the nearest neighbors found. It has the same + number of elements as indices. + @param radius The search radius. + @param params SearchParams + + This function returns the number of nearest neighbors found. + */ int radiusSearch(const std::vector& query, std::vector& indices, std::vector& dists, DistanceType radius, const ::cvflann::SearchParams& params); int radiusSearch(const Mat& query, Mat& indices, Mat& dists, @@ -226,14 +289,15 @@ class GenericIndex int veclen() const { return nnIndex->veclen(); } - int size() const { return nnIndex->size(); } + int size() const { return (int)nnIndex->size(); } ::cvflann::IndexParams getParameters() { return nnIndex->getParameters(); } - FLANN_DEPRECATED const ::cvflann::IndexParams* getIndexParameters() { return nnIndex->getIndexParameters(); } + CV_DEPRECATED const ::cvflann::IndexParams* getIndexParameters() { return nnIndex->getIndexParameters(); } private: ::cvflann::Index* nnIndex; + Mat _dataset; }; //! @cond IGNORED @@ -249,10 +313,11 @@ class GenericIndex template GenericIndex::GenericIndex(const Mat& dataset, const ::cvflann::IndexParams& params, Distance distance) +: _dataset(dataset) { CV_Assert(dataset.type() == CvType::type()); CV_Assert(dataset.isContinuous()); - ::cvflann::Matrix m_dataset((ElementType*)dataset.ptr(0), dataset.rows, dataset.cols); + ::cvflann::Matrix m_dataset((ElementType*)_dataset.ptr(0), _dataset.rows, _dataset.cols); nnIndex = new ::cvflann::Index(m_dataset, params, distance); @@ -332,168 +397,137 @@ int GenericIndex::radiusSearch(const Mat& query, Mat& indices, Mat& di return nnIndex->radiusSearch(m_query,m_indices,m_dists,radius,searchParams); } -//! @endcond - /** * @deprecated Use GenericIndex class instead */ template -class -#ifndef _MSC_VER - FLANN_DEPRECATED -#endif - Index_ { +class Index_ +{ public: - typedef typename L2::ElementType ElementType; - typedef typename L2::ResultType DistanceType; - - Index_(const Mat& features, const ::cvflann::IndexParams& params); - - ~Index_(); + typedef typename L2::ElementType ElementType; + typedef typename L2::ResultType DistanceType; - void knnSearch(const std::vector& query, std::vector& indices, std::vector& dists, int knn, const ::cvflann::SearchParams& params); - void knnSearch(const Mat& queries, Mat& indices, Mat& dists, int knn, const ::cvflann::SearchParams& params); - - int radiusSearch(const std::vector& query, std::vector& indices, std::vector& dists, DistanceType radius, const ::cvflann::SearchParams& params); - int radiusSearch(const Mat& query, Mat& indices, Mat& dists, DistanceType radius, const ::cvflann::SearchParams& params); - - void save(String filename) - { - if (nnIndex_L1) nnIndex_L1->save(filename); - if (nnIndex_L2) nnIndex_L2->save(filename); - } - - int veclen() const + CV_DEPRECATED Index_(const Mat& dataset, const ::cvflann::IndexParams& params) { - if (nnIndex_L1) return nnIndex_L1->veclen(); - if (nnIndex_L2) return nnIndex_L2->veclen(); - } + printf("[WARNING] The cv::flann::Index_ class is deperecated, use cv::flann::GenericIndex instead\n"); - int size() const - { - if (nnIndex_L1) return nnIndex_L1->size(); - if (nnIndex_L2) return nnIndex_L2->size(); - } - - ::cvflann::IndexParams getParameters() - { - if (nnIndex_L1) return nnIndex_L1->getParameters(); - if (nnIndex_L2) return nnIndex_L2->getParameters(); + CV_Assert(dataset.type() == CvType::type()); + CV_Assert(dataset.isContinuous()); + ::cvflann::Matrix m_dataset((ElementType*)dataset.ptr(0), dataset.rows, dataset.cols); + if ( ::cvflann::flann_distance_type() == cvflann::FLANN_DIST_L2 ) { + nnIndex_L1 = NULL; + nnIndex_L2 = new ::cvflann::Index< L2 >(m_dataset, params); } - - FLANN_DEPRECATED const ::cvflann::IndexParams* getIndexParameters() - { - if (nnIndex_L1) return nnIndex_L1->getIndexParameters(); - if (nnIndex_L2) return nnIndex_L2->getIndexParameters(); + else if ( ::cvflann::flann_distance_type() == cvflann::FLANN_DIST_L1 ) { + nnIndex_L1 = new ::cvflann::Index< L1 >(m_dataset, params); + nnIndex_L2 = NULL; } + else { + printf("[ERROR] cv::flann::Index_ only provides backwards compatibility for the L1 and L2 distances. " + "For other distance types you must use cv::flann::GenericIndex\n"); + CV_Assert(0); + } + if (nnIndex_L1) nnIndex_L1->buildIndex(); + if (nnIndex_L2) nnIndex_L2->buildIndex(); + } + CV_DEPRECATED ~Index_() + { + if (nnIndex_L1) delete nnIndex_L1; + if (nnIndex_L2) delete nnIndex_L2; + } -private: - // providing backwards compatibility for L2 and L1 distances (most common) - ::cvflann::Index< L2 >* nnIndex_L2; - ::cvflann::Index< L1 >* nnIndex_L1; -}; - -#ifdef _MSC_VER -template -class FLANN_DEPRECATED Index_; -#endif + CV_DEPRECATED void knnSearch(const std::vector& query, std::vector& indices, std::vector& dists, int knn, const ::cvflann::SearchParams& searchParams) + { + ::cvflann::Matrix m_query((ElementType*)&query[0], 1, query.size()); + ::cvflann::Matrix m_indices(&indices[0], 1, indices.size()); + ::cvflann::Matrix m_dists(&dists[0], 1, dists.size()); -//! @cond IGNORED + if (nnIndex_L1) nnIndex_L1->knnSearch(m_query,m_indices,m_dists,knn,searchParams); + if (nnIndex_L2) nnIndex_L2->knnSearch(m_query,m_indices,m_dists,knn,searchParams); + } + CV_DEPRECATED void knnSearch(const Mat& queries, Mat& indices, Mat& dists, int knn, const ::cvflann::SearchParams& searchParams) + { + CV_Assert(queries.type() == CvType::type()); + CV_Assert(queries.isContinuous()); + ::cvflann::Matrix m_queries((ElementType*)queries.ptr(0), queries.rows, queries.cols); -template -Index_::Index_(const Mat& dataset, const ::cvflann::IndexParams& params) -{ - printf("[WARNING] The cv::flann::Index_ class is deperecated, use cv::flann::GenericIndex instead\n"); + CV_Assert(indices.type() == CV_32S); + CV_Assert(indices.isContinuous()); + ::cvflann::Matrix m_indices((int*)indices.ptr(0), indices.rows, indices.cols); - CV_Assert(dataset.type() == CvType::type()); - CV_Assert(dataset.isContinuous()); - ::cvflann::Matrix m_dataset((ElementType*)dataset.ptr(0), dataset.rows, dataset.cols); + CV_Assert(dists.type() == CvType::type()); + CV_Assert(dists.isContinuous()); + ::cvflann::Matrix m_dists((DistanceType*)dists.ptr(0), dists.rows, dists.cols); - if ( ::cvflann::flann_distance_type() == cvflann::FLANN_DIST_L2 ) { - nnIndex_L1 = NULL; - nnIndex_L2 = new ::cvflann::Index< L2 >(m_dataset, params); + if (nnIndex_L1) nnIndex_L1->knnSearch(m_queries,m_indices,m_dists,knn, searchParams); + if (nnIndex_L2) nnIndex_L2->knnSearch(m_queries,m_indices,m_dists,knn, searchParams); } - else if ( ::cvflann::flann_distance_type() == cvflann::FLANN_DIST_L1 ) { - nnIndex_L1 = new ::cvflann::Index< L1 >(m_dataset, params); - nnIndex_L2 = NULL; - } - else { - printf("[ERROR] cv::flann::Index_ only provides backwards compatibility for the L1 and L2 distances. " - "For other distance types you must use cv::flann::GenericIndex\n"); - CV_Assert(0); - } - if (nnIndex_L1) nnIndex_L1->buildIndex(); - if (nnIndex_L2) nnIndex_L2->buildIndex(); -} -template -Index_::~Index_() -{ - if (nnIndex_L1) delete nnIndex_L1; - if (nnIndex_L2) delete nnIndex_L2; -} - -template -void Index_::knnSearch(const std::vector& query, std::vector& indices, std::vector& dists, int knn, const ::cvflann::SearchParams& searchParams) -{ - ::cvflann::Matrix m_query((ElementType*)&query[0], 1, query.size()); - ::cvflann::Matrix m_indices(&indices[0], 1, indices.size()); - ::cvflann::Matrix m_dists(&dists[0], 1, dists.size()); + CV_DEPRECATED int radiusSearch(const std::vector& query, std::vector& indices, std::vector& dists, DistanceType radius, const ::cvflann::SearchParams& searchParams) + { + ::cvflann::Matrix m_query((ElementType*)&query[0], 1, query.size()); + ::cvflann::Matrix m_indices(&indices[0], 1, indices.size()); + ::cvflann::Matrix m_dists(&dists[0], 1, dists.size()); - if (nnIndex_L1) nnIndex_L1->knnSearch(m_query,m_indices,m_dists,knn,searchParams); - if (nnIndex_L2) nnIndex_L2->knnSearch(m_query,m_indices,m_dists,knn,searchParams); -} + if (nnIndex_L1) return nnIndex_L1->radiusSearch(m_query,m_indices,m_dists,radius,searchParams); + if (nnIndex_L2) return nnIndex_L2->radiusSearch(m_query,m_indices,m_dists,radius,searchParams); + } + CV_DEPRECATED int radiusSearch(const Mat& query, Mat& indices, Mat& dists, DistanceType radius, const ::cvflann::SearchParams& searchParams) + { + CV_Assert(query.type() == CvType::type()); + CV_Assert(query.isContinuous()); + ::cvflann::Matrix m_query((ElementType*)query.ptr(0), query.rows, query.cols); -template -void Index_::knnSearch(const Mat& queries, Mat& indices, Mat& dists, int knn, const ::cvflann::SearchParams& searchParams) -{ - CV_Assert(queries.type() == CvType::type()); - CV_Assert(queries.isContinuous()); - ::cvflann::Matrix m_queries((ElementType*)queries.ptr(0), queries.rows, queries.cols); + CV_Assert(indices.type() == CV_32S); + CV_Assert(indices.isContinuous()); + ::cvflann::Matrix m_indices((int*)indices.ptr(0), indices.rows, indices.cols); - CV_Assert(indices.type() == CV_32S); - CV_Assert(indices.isContinuous()); - ::cvflann::Matrix m_indices((int*)indices.ptr(0), indices.rows, indices.cols); + CV_Assert(dists.type() == CvType::type()); + CV_Assert(dists.isContinuous()); + ::cvflann::Matrix m_dists((DistanceType*)dists.ptr(0), dists.rows, dists.cols); - CV_Assert(dists.type() == CvType::type()); - CV_Assert(dists.isContinuous()); - ::cvflann::Matrix m_dists((DistanceType*)dists.ptr(0), dists.rows, dists.cols); + if (nnIndex_L1) return nnIndex_L1->radiusSearch(m_query,m_indices,m_dists,radius,searchParams); + if (nnIndex_L2) return nnIndex_L2->radiusSearch(m_query,m_indices,m_dists,radius,searchParams); + } - if (nnIndex_L1) nnIndex_L1->knnSearch(m_queries,m_indices,m_dists,knn, searchParams); - if (nnIndex_L2) nnIndex_L2->knnSearch(m_queries,m_indices,m_dists,knn, searchParams); -} + CV_DEPRECATED void save(String filename) + { + if (nnIndex_L1) nnIndex_L1->save(filename); + if (nnIndex_L2) nnIndex_L2->save(filename); + } -template -int Index_::radiusSearch(const std::vector& query, std::vector& indices, std::vector& dists, DistanceType radius, const ::cvflann::SearchParams& searchParams) -{ - ::cvflann::Matrix m_query((ElementType*)&query[0], 1, query.size()); - ::cvflann::Matrix m_indices(&indices[0], 1, indices.size()); - ::cvflann::Matrix m_dists(&dists[0], 1, dists.size()); + CV_DEPRECATED int veclen() const + { + if (nnIndex_L1) return nnIndex_L1->veclen(); + if (nnIndex_L2) return nnIndex_L2->veclen(); + } - if (nnIndex_L1) return nnIndex_L1->radiusSearch(m_query,m_indices,m_dists,radius,searchParams); - if (nnIndex_L2) return nnIndex_L2->radiusSearch(m_query,m_indices,m_dists,radius,searchParams); -} + CV_DEPRECATED int size() const + { + if (nnIndex_L1) return nnIndex_L1->size(); + if (nnIndex_L2) return nnIndex_L2->size(); + } -template -int Index_::radiusSearch(const Mat& query, Mat& indices, Mat& dists, DistanceType radius, const ::cvflann::SearchParams& searchParams) -{ - CV_Assert(query.type() == CvType::type()); - CV_Assert(query.isContinuous()); - ::cvflann::Matrix m_query((ElementType*)query.ptr(0), query.rows, query.cols); + CV_DEPRECATED ::cvflann::IndexParams getParameters() + { + if (nnIndex_L1) return nnIndex_L1->getParameters(); + if (nnIndex_L2) return nnIndex_L2->getParameters(); - CV_Assert(indices.type() == CV_32S); - CV_Assert(indices.isContinuous()); - ::cvflann::Matrix m_indices((int*)indices.ptr(0), indices.rows, indices.cols); + } - CV_Assert(dists.type() == CvType::type()); - CV_Assert(dists.isContinuous()); - ::cvflann::Matrix m_dists((DistanceType*)dists.ptr(0), dists.rows, dists.cols); + CV_DEPRECATED const ::cvflann::IndexParams* getIndexParameters() + { + if (nnIndex_L1) return nnIndex_L1->getIndexParameters(); + if (nnIndex_L2) return nnIndex_L2->getIndexParameters(); + } - if (nnIndex_L1) return nnIndex_L1->radiusSearch(m_query,m_indices,m_dists,radius,searchParams); - if (nnIndex_L2) return nnIndex_L2->radiusSearch(m_query,m_indices,m_dists,radius,searchParams); -} +private: + // providing backwards compatibility for L2 and L1 distances (most common) + ::cvflann::Index< L2 >* nnIndex_L2; + ::cvflann::Index< L1 >* nnIndex_L1; +}; //! @endcond @@ -532,10 +566,10 @@ int hierarchicalClustering(const Mat& features, Mat& centers, const ::cvflann::K return ::cvflann::hierarchicalClustering(m_features, m_centers, params, d); } -/** @deprecated -*/ +//! @cond IGNORED + template -FLANN_DEPRECATED int hierarchicalClustering(const Mat& features, Mat& centers, const ::cvflann::KMeansIndexParams& params) +CV_DEPRECATED int hierarchicalClustering(const Mat& features, Mat& centers, const ::cvflann::KMeansIndexParams& params) { printf("[WARNING] cv::flann::hierarchicalClustering is deprecated, use " "cv::flann::hierarchicalClustering instead\n"); @@ -554,6 +588,8 @@ FLANN_DEPRECATED int hierarchicalClustering(const Mat& features, Mat& centers, c } } +//! @endcond + //! @} flann } } // namespace cv::flann diff --git a/IPL/include/opencv/opencv2/flann/all_indices.h b/IPL/include/opencv/opencv2/flann/all_indices.h index ff53fd8..ba5a2f2 100644 --- a/IPL/include/opencv/opencv2/flann/all_indices.h +++ b/IPL/include/opencv/opencv2/flann/all_indices.h @@ -30,6 +30,8 @@ #ifndef OPENCV_FLANN_ALL_INDICES_H_ #define OPENCV_FLANN_ALL_INDICES_H_ +//! @cond IGNORED + #include "general.h" #include "nn_index.h" @@ -152,4 +154,6 @@ NNIndex* create_index_by_type(const Matrix #include @@ -97,6 +99,7 @@ class PooledAllocator blocksize = blockSize; remaining = 0; base = NULL; + loc = NULL; usedMemory = 0; wastedMemory = 0; @@ -181,8 +184,13 @@ class PooledAllocator return mem; } +private: + PooledAllocator(const PooledAllocator &); // copy disabled + PooledAllocator& operator=(const PooledAllocator &); // assign disabled }; } +//! @endcond + #endif //OPENCV_FLANN_ALLOCATOR_H_ diff --git a/IPL/include/opencv/opencv2/flann/any.h b/IPL/include/opencv/opencv2/flann/any.h index bfe06c8..f5684e9 100644 --- a/IPL/include/opencv/opencv2/flann/any.h +++ b/IPL/include/opencv/opencv2/flann/any.h @@ -12,6 +12,8 @@ * Adapted for FLANN by Marius Muja */ +//! @cond IGNORED + #include "defines.h" #include #include @@ -54,50 +56,50 @@ struct base_any_policy template struct typed_base_any_policy : base_any_policy { - virtual ::size_t get_size() { return sizeof(T); } - virtual const std::type_info& type() { return typeid(T); } + virtual ::size_t get_size() CV_OVERRIDE { return sizeof(T); } + virtual const std::type_info& type() CV_OVERRIDE { return typeid(T); } }; template -struct small_any_policy : typed_base_any_policy +struct small_any_policy CV_FINAL : typed_base_any_policy { - virtual void static_delete(void**) { } - virtual void copy_from_value(void const* src, void** dest) + virtual void static_delete(void**) CV_OVERRIDE { } + virtual void copy_from_value(void const* src, void** dest) CV_OVERRIDE { new (dest) T(* reinterpret_cast(src)); } - virtual void clone(void* const* src, void** dest) { *dest = *src; } - virtual void move(void* const* src, void** dest) { *dest = *src; } - virtual void* get_value(void** src) { return reinterpret_cast(src); } - virtual const void* get_value(void* const * src) { return reinterpret_cast(src); } - virtual void print(std::ostream& out, void* const* src) { out << *reinterpret_cast(src); } + virtual void clone(void* const* src, void** dest) CV_OVERRIDE { *dest = *src; } + virtual void move(void* const* src, void** dest) CV_OVERRIDE { *dest = *src; } + virtual void* get_value(void** src) CV_OVERRIDE { return reinterpret_cast(src); } + virtual const void* get_value(void* const * src) CV_OVERRIDE { return reinterpret_cast(src); } + virtual void print(std::ostream& out, void* const* src) CV_OVERRIDE { out << *reinterpret_cast(src); } }; template -struct big_any_policy : typed_base_any_policy +struct big_any_policy CV_FINAL : typed_base_any_policy { - virtual void static_delete(void** x) + virtual void static_delete(void** x) CV_OVERRIDE { if (* x) delete (* reinterpret_cast(x)); *x = NULL; } - virtual void copy_from_value(void const* src, void** dest) + virtual void copy_from_value(void const* src, void** dest) CV_OVERRIDE { *dest = new T(*reinterpret_cast(src)); } - virtual void clone(void* const* src, void** dest) + virtual void clone(void* const* src, void** dest) CV_OVERRIDE { *dest = new T(**reinterpret_cast(src)); } - virtual void move(void* const* src, void** dest) + virtual void move(void* const* src, void** dest) CV_OVERRIDE { (*reinterpret_cast(dest))->~T(); **reinterpret_cast(dest) = **reinterpret_cast(src); } - virtual void* get_value(void** src) { return *src; } - virtual const void* get_value(void* const * src) { return *src; } - virtual void print(std::ostream& out, void* const* src) { out << *reinterpret_cast(*src); } + virtual void* get_value(void** src) CV_OVERRIDE { return *src; } + virtual const void* get_value(void* const * src) CV_OVERRIDE { return *src; } + virtual void print(std::ostream& out, void* const* src) CV_OVERRIDE { out << *reinterpret_cast(*src); } }; template<> inline void big_any_policy::print(std::ostream& out, void* const* src) @@ -246,6 +248,12 @@ struct any return assign(x); } + /// Assignment operator. Template-based version above doesn't work as expected. We need regular assignment operator here. + any& operator=(const any& x) + { + return assign(x); + } + /// Assignment operator, specialed for literal strings. /// They have types like const char [6] which don't work as expected. any& operator=(const char* x) @@ -321,4 +329,6 @@ inline std::ostream& operator <<(std::ostream& out, const any& any_val) } +//! @endcond + #endif // OPENCV_FLANN_ANY_H_ diff --git a/IPL/include/opencv/opencv2/flann/autotuned_index.h b/IPL/include/opencv/opencv2/flann/autotuned_index.h index 6ffb929..eb4554f 100644 --- a/IPL/include/opencv/opencv2/flann/autotuned_index.h +++ b/IPL/include/opencv/opencv2/flann/autotuned_index.h @@ -30,6 +30,10 @@ #ifndef OPENCV_FLANN_AUTOTUNED_INDEX_H_ #define OPENCV_FLANN_AUTOTUNED_INDEX_H_ +//! @cond IGNORED + +#include + #include "general.h" #include "nn_index.h" #include "ground_truth.h" @@ -81,6 +85,7 @@ class AutotunedIndex : public NNIndex memory_weight_ = get_param(params, "memory_weight", 0.0f); sample_fraction_ = get_param(params,"sample_fraction", 0.1f); bestIndex_ = NULL; + speedup_ = 0; } AutotunedIndex(const AutotunedIndex&); @@ -97,7 +102,7 @@ class AutotunedIndex : public NNIndex /** * Method responsible with building the index. */ - virtual void buildIndex() + virtual void buildIndex() CV_OVERRIDE { std::ostringstream stream; bestParams_ = estimateBuildParams(); @@ -121,7 +126,7 @@ class AutotunedIndex : public NNIndex /** * Saves the index to a stream */ - virtual void saveIndex(FILE* stream) + virtual void saveIndex(FILE* stream) CV_OVERRIDE { save_value(stream, (int)bestIndex_->getType()); bestIndex_->saveIndex(stream); @@ -131,7 +136,7 @@ class AutotunedIndex : public NNIndex /** * Loads the index from a stream */ - virtual void loadIndex(FILE* stream) + virtual void loadIndex(FILE* stream) CV_OVERRIDE { int index_type; @@ -148,7 +153,7 @@ class AutotunedIndex : public NNIndex /** * Method that searches for nearest-neighbors */ - virtual void findNeighbors(ResultSet& result, const ElementType* vec, const SearchParams& searchParams) + virtual void findNeighbors(ResultSet& result, const ElementType* vec, const SearchParams& searchParams) CV_OVERRIDE { int checks = get_param(searchParams,"checks",FLANN_CHECKS_AUTOTUNED); if (checks == FLANN_CHECKS_AUTOTUNED) { @@ -160,7 +165,7 @@ class AutotunedIndex : public NNIndex } - IndexParams getParameters() const + IndexParams getParameters() const CV_OVERRIDE { return bestIndex_->getParameters(); } @@ -179,7 +184,7 @@ class AutotunedIndex : public NNIndex /** * Number of features in this index. */ - virtual size_t size() const + virtual size_t size() const CV_OVERRIDE { return bestIndex_->size(); } @@ -187,7 +192,7 @@ class AutotunedIndex : public NNIndex /** * The length of each vector in this index. */ - virtual size_t veclen() const + virtual size_t veclen() const CV_OVERRIDE { return bestIndex_->veclen(); } @@ -195,7 +200,7 @@ class AutotunedIndex : public NNIndex /** * The amount of memory (in bytes) this index uses. */ - virtual int usedMemory() const + virtual int usedMemory() const CV_OVERRIDE { return bestIndex_->usedMemory(); } @@ -203,7 +208,7 @@ class AutotunedIndex : public NNIndex /** * Algorithm name */ - virtual flann_algorithm_t getType() const + virtual flann_algorithm_t getType() const CV_OVERRIDE { return FLANN_INDEX_AUTOTUNED; } @@ -585,4 +590,6 @@ class AutotunedIndex : public NNIndex }; } +//! @endcond + #endif /* OPENCV_FLANN_AUTOTUNED_INDEX_H_ */ diff --git a/IPL/include/opencv/opencv2/flann/composite_index.h b/IPL/include/opencv/opencv2/flann/composite_index.h index 527ca1a..bcf0827 100644 --- a/IPL/include/opencv/opencv2/flann/composite_index.h +++ b/IPL/include/opencv/opencv2/flann/composite_index.h @@ -31,6 +31,8 @@ #ifndef OPENCV_FLANN_COMPOSITE_INDEX_H_ #define OPENCV_FLANN_COMPOSITE_INDEX_H_ +//! @cond IGNORED + #include "general.h" #include "nn_index.h" #include "kdtree_index.h" @@ -101,7 +103,7 @@ class CompositeIndex : public NNIndex /** * @return The index type */ - flann_algorithm_t getType() const + flann_algorithm_t getType() const CV_OVERRIDE { return FLANN_INDEX_COMPOSITE; } @@ -109,7 +111,7 @@ class CompositeIndex : public NNIndex /** * @return Size of the index */ - size_t size() const + size_t size() const CV_OVERRIDE { return kdtree_index_->size(); } @@ -117,7 +119,7 @@ class CompositeIndex : public NNIndex /** * \returns The dimensionality of the features in this index. */ - size_t veclen() const + size_t veclen() const CV_OVERRIDE { return kdtree_index_->veclen(); } @@ -125,7 +127,7 @@ class CompositeIndex : public NNIndex /** * \returns The amount of memory (in bytes) used by the index. */ - int usedMemory() const + int usedMemory() const CV_OVERRIDE { return kmeans_index_->usedMemory() + kdtree_index_->usedMemory(); } @@ -133,7 +135,7 @@ class CompositeIndex : public NNIndex /** * \brief Builds the index */ - void buildIndex() + void buildIndex() CV_OVERRIDE { Logger::info("Building kmeans tree...\n"); kmeans_index_->buildIndex(); @@ -145,7 +147,7 @@ class CompositeIndex : public NNIndex * \brief Saves the index to a stream * \param stream The stream to save the index to */ - void saveIndex(FILE* stream) + void saveIndex(FILE* stream) CV_OVERRIDE { kmeans_index_->saveIndex(stream); kdtree_index_->saveIndex(stream); @@ -155,7 +157,7 @@ class CompositeIndex : public NNIndex * \brief Loads the index from a stream * \param stream The stream from which the index is loaded */ - void loadIndex(FILE* stream) + void loadIndex(FILE* stream) CV_OVERRIDE { kmeans_index_->loadIndex(stream); kdtree_index_->loadIndex(stream); @@ -164,7 +166,7 @@ class CompositeIndex : public NNIndex /** * \returns The index parameters */ - IndexParams getParameters() const + IndexParams getParameters() const CV_OVERRIDE { return index_params_; } @@ -172,7 +174,7 @@ class CompositeIndex : public NNIndex /** * \brief Method that searches for nearest-neighbours */ - void findNeighbors(ResultSet& result, const ElementType* vec, const SearchParams& searchParams) + void findNeighbors(ResultSet& result, const ElementType* vec, const SearchParams& searchParams) CV_OVERRIDE { kmeans_index_->findNeighbors(result, vec, searchParams); kdtree_index_->findNeighbors(result, vec, searchParams); @@ -191,4 +193,6 @@ class CompositeIndex : public NNIndex } +//! @endcond + #endif //OPENCV_FLANN_COMPOSITE_INDEX_H_ diff --git a/IPL/include/opencv/opencv2/flann/config.h b/IPL/include/opencv/opencv2/flann/config.h index 56832fd..c9342c0 100644 --- a/IPL/include/opencv/opencv2/flann/config.h +++ b/IPL/include/opencv/opencv2/flann/config.h @@ -30,9 +30,13 @@ #ifndef OPENCV_FLANN_CONFIG_H_ #define OPENCV_FLANN_CONFIG_H_ +//! @cond IGNORED + #ifdef FLANN_VERSION_ #undef FLANN_VERSION_ #endif #define FLANN_VERSION_ "1.6.10" +//! @endcond + #endif /* OPENCV_FLANN_CONFIG_H_ */ diff --git a/IPL/include/opencv/opencv2/flann/defines.h b/IPL/include/opencv/opencv2/flann/defines.h index f0264f7..884c600 100644 --- a/IPL/include/opencv/opencv2/flann/defines.h +++ b/IPL/include/opencv/opencv2/flann/defines.h @@ -30,12 +30,14 @@ #ifndef OPENCV_FLANN_DEFINES_H_ #define OPENCV_FLANN_DEFINES_H_ +//! @cond IGNORED + #include "config.h" #ifdef FLANN_EXPORT #undef FLANN_EXPORT #endif -#ifdef WIN32 +#ifdef _WIN32 /* win32 dll export/import directives */ #ifdef FLANN_EXPORTS #define FLANN_EXPORT __declspec(dllexport) @@ -50,19 +52,6 @@ #endif -#ifdef FLANN_DEPRECATED -#undef FLANN_DEPRECATED -#endif -#ifdef __GNUC__ -#define FLANN_DEPRECATED __attribute__ ((deprecated)) -#elif defined(_MSC_VER) -#define FLANN_DEPRECATED __declspec(deprecated) -#else -#pragma message("WARNING: You need to implement FLANN_DEPRECATED for this compiler") -#define FLANN_DEPRECATED -#endif - - #undef FLANN_PLATFORM_32_BIT #undef FLANN_PLATFORM_64_BIT #if defined __amd64__ || defined __x86_64__ || defined _WIN64 || defined _M_X64 @@ -174,4 +163,6 @@ enum } +//! @endcond + #endif /* OPENCV_FLANN_DEFINES_H_ */ diff --git a/IPL/include/opencv/opencv2/flann/dist.h b/IPL/include/opencv/opencv2/flann/dist.h index 9dbe527..07a1cc2 100644 --- a/IPL/include/opencv/opencv2/flann/dist.h +++ b/IPL/include/opencv/opencv2/flann/dist.h @@ -31,6 +31,8 @@ #ifndef OPENCV_FLANN_DIST_H_ #define OPENCV_FLANN_DIST_H_ +//! @cond IGNORED + #include #include #include @@ -43,11 +45,11 @@ typedef unsigned __int64 uint64_t; #include "defines.h" -#if (defined WIN32 || defined _WIN32) && defined(_M_ARM) +#if defined _WIN32 && (defined(_M_ARM) || defined(_M_ARM64)) # include #endif -#ifdef __ARM_NEON__ +#if defined(__ARM_NEON__) && !defined(__CUDACC__) # include "arm_neon.h" #endif @@ -114,7 +116,7 @@ struct L2_Simple ResultType result = ResultType(); ResultType diff; for(size_t i = 0; i < size; ++i ) { - diff = *a++ - *b++; + diff = (ResultType)(*a++ - *b++); result += diff*diff; } return result; @@ -425,7 +427,7 @@ struct Hamming ResultType operator()(Iterator1 a, Iterator2 b, size_t size, ResultType /*worst_dist*/ = -1) const { ResultType result = 0; -#ifdef __ARM_NEON__ +#if defined(__ARM_NEON__) && !defined(__CUDACC__) { uint32x4_t bits = vmovq_n_u32(0); for (size_t i = 0; i < size; i += 16) { @@ -441,7 +443,7 @@ struct Hamming result = vgetq_lane_s32 (vreinterpretq_s32_u64(bitSet2),0); result += vgetq_lane_s32 (vreinterpretq_s32_u64(bitSet2),2); } -#elif __GNUC__ +#elif defined(__GNUC__) { //for portability just use unsigned long -- and use the __builtin_popcountll (see docs for __builtin_popcountll) typedef unsigned long long pop_t; @@ -462,10 +464,9 @@ struct Hamming } } #else // NO NEON and NOT GNUC - typedef unsigned long long pop_t; HammingLUT lut; result = lut(reinterpret_cast (a), - reinterpret_cast (b), size * sizeof(pop_t)); + reinterpret_cast (b), size); #endif return result; } @@ -698,7 +699,7 @@ struct KL_Divergence typedef typename Accumulator::Type ResultType; /** - * Compute the Kullback–Leibler divergence + * Compute the Kullback-Leibler divergence */ template ResultType operator()(Iterator1 a, Iterator2 b, size_t size, ResultType worst_dist = -1) const @@ -843,7 +844,7 @@ typename Distance::ResultType ensureSquareDistance( typename Distance::ResultTyp /* * ...and a template to ensure the user that he will process the normal distance, - * and not squared distance, without loosing processing time calling sqrt(ensureSquareDistance) + * and not squared distance, without losing processing time calling sqrt(ensureSquareDistance) * that will result in doing actually sqrt(dist*dist) for L1 distance for instance. */ template @@ -902,4 +903,6 @@ typename Distance::ResultType ensureSimpleDistance( typename Distance::ResultTyp } +//! @endcond + #endif //OPENCV_FLANN_DIST_H_ diff --git a/IPL/include/opencv/opencv2/flann/dummy.h b/IPL/include/opencv/opencv2/flann/dummy.h index 26bd3fa..c176f2e 100644 --- a/IPL/include/opencv/opencv2/flann/dummy.h +++ b/IPL/include/opencv/opencv2/flann/dummy.h @@ -2,15 +2,15 @@ #ifndef OPENCV_FLANN_DUMMY_H_ #define OPENCV_FLANN_DUMMY_H_ +//! @cond IGNORED + namespace cvflann { -#if (defined WIN32 || defined _WIN32 || defined WINCE) && defined CVAPI_EXPORTS -__declspec(dllexport) -#endif -void dummyfunc(); +CV_DEPRECATED inline void dummyfunc() {} } +//! @endcond #endif /* OPENCV_FLANN_DUMMY_H_ */ diff --git a/IPL/include/opencv/opencv2/flann/dynamic_bitset.h b/IPL/include/opencv/opencv2/flann/dynamic_bitset.h index d795b5d..a00ce1b 100644 --- a/IPL/include/opencv/opencv2/flann/dynamic_bitset.h +++ b/IPL/include/opencv/opencv2/flann/dynamic_bitset.h @@ -35,6 +35,8 @@ #ifndef OPENCV_FLANN_DYNAMIC_BITSET_H_ #define OPENCV_FLANN_DYNAMIC_BITSET_H_ +//! @cond IGNORED + #ifndef FLANN_USE_BOOST # define FLANN_USE_BOOST 0 #endif @@ -59,7 +61,7 @@ class DynamicBitset public: /** default constructor */ - DynamicBitset() + DynamicBitset() : size_(0) { } @@ -156,4 +158,6 @@ class DynamicBitset #endif +//! @endcond + #endif // OPENCV_FLANN_DYNAMIC_BITSET_H_ diff --git a/IPL/include/opencv/opencv2/flann/flann_base.hpp b/IPL/include/opencv/opencv2/flann/flann_base.hpp index 98c33cf..83606d2 100644 --- a/IPL/include/opencv/opencv2/flann/flann_base.hpp +++ b/IPL/include/opencv/opencv2/flann/flann_base.hpp @@ -31,6 +31,8 @@ #ifndef OPENCV_FLANN_BASE_HPP_ #define OPENCV_FLANN_BASE_HPP_ +//! @cond IGNORED + #include #include #include @@ -80,9 +82,11 @@ NNIndex* load_saved_index(const Matrix } IndexHeader header = load_header(fin); if (header.data_type != Datatype::type()) { + fclose(fin); throw FLANNException("Datatype of saved index is different than of the one to be created."); } if ((size_t(header.rows) != dataset.rows)||(size_t(header.cols) != dataset.cols)) { + fclose(fin); throw FLANNException("The index saved belongs to a different dataset"); } @@ -126,7 +130,7 @@ class Index : public NNIndex /** * Builds the index. */ - void buildIndex() + void buildIndex() CV_OVERRIDE { if (!loaded_) { nnIndex_->buildIndex(); @@ -148,7 +152,7 @@ class Index : public NNIndex * \brief Saves the index to a stream * \param stream The stream to save the index to */ - virtual void saveIndex(FILE* stream) + virtual void saveIndex(FILE* stream) CV_OVERRIDE { nnIndex_->saveIndex(stream); } @@ -157,7 +161,7 @@ class Index : public NNIndex * \brief Loads the index from a stream * \param stream The stream from which the index is loaded */ - virtual void loadIndex(FILE* stream) + virtual void loadIndex(FILE* stream) CV_OVERRIDE { nnIndex_->loadIndex(stream); } @@ -165,7 +169,7 @@ class Index : public NNIndex /** * \returns number of features in this index. */ - size_t veclen() const + size_t veclen() const CV_OVERRIDE { return nnIndex_->veclen(); } @@ -173,7 +177,7 @@ class Index : public NNIndex /** * \returns The dimensionality of the features in this index. */ - size_t size() const + size_t size() const CV_OVERRIDE { return nnIndex_->size(); } @@ -181,7 +185,7 @@ class Index : public NNIndex /** * \returns The index type (kdtree, kmeans,...) */ - flann_algorithm_t getType() const + flann_algorithm_t getType() const CV_OVERRIDE { return nnIndex_->getType(); } @@ -189,7 +193,7 @@ class Index : public NNIndex /** * \returns The amount of memory (in bytes) used by the index. */ - virtual int usedMemory() const + virtual int usedMemory() const CV_OVERRIDE { return nnIndex_->usedMemory(); } @@ -198,7 +202,7 @@ class Index : public NNIndex /** * \returns The index parameters */ - IndexParams getParameters() const + IndexParams getParameters() const CV_OVERRIDE { return nnIndex_->getParameters(); } @@ -211,7 +215,7 @@ class Index : public NNIndex * \param[in] knn Number of nearest neighbors to return * \param[in] params Search parameters */ - void knnSearch(const Matrix& queries, Matrix& indices, Matrix& dists, int knn, const SearchParams& params) + void knnSearch(const Matrix& queries, Matrix& indices, Matrix& dists, int knn, const SearchParams& params) CV_OVERRIDE { nnIndex_->knnSearch(queries, indices, dists, knn, params); } @@ -225,7 +229,7 @@ class Index : public NNIndex * \param[in] params Search parameters * \returns Number of neighbors found */ - int radiusSearch(const Matrix& query, Matrix& indices, Matrix& dists, float radius, const SearchParams& params) + int radiusSearch(const Matrix& query, Matrix& indices, Matrix& dists, float radius, const SearchParams& params) CV_OVERRIDE { return nnIndex_->radiusSearch(query, indices, dists, radius, params); } @@ -233,7 +237,7 @@ class Index : public NNIndex /** * \brief Method that searches for nearest-neighbours */ - void findNeighbors(ResultSet& result, const ElementType* vec, const SearchParams& searchParams) + void findNeighbors(ResultSet& result, const ElementType* vec, const SearchParams& searchParams) CV_OVERRIDE { nnIndex_->findNeighbors(result, vec, searchParams); } @@ -241,7 +245,7 @@ class Index : public NNIndex /** * \brief Returns actual index */ - FLANN_DEPRECATED NNIndex* getIndex() + CV_DEPRECATED NNIndex* getIndex() { return nnIndex_; } @@ -250,7 +254,7 @@ class Index : public NNIndex * \brief Returns index parameters. * \deprecated use getParameters() instead. */ - FLANN_DEPRECATED const IndexParams* getIndexParameters() + CV_DEPRECATED const IndexParams* getIndexParameters() { return &index_params_; } @@ -262,6 +266,9 @@ class Index : public NNIndex bool loaded_; /** Parameters passed to the index */ IndexParams index_params_; + + Index(const Index &); // copy disabled + Index& operator=(const Index &); // assign disabled }; /** @@ -287,4 +294,7 @@ int hierarchicalClustering(const Matrix& points, } } + +//! @endcond + #endif /* OPENCV_FLANN_BASE_HPP_ */ diff --git a/IPL/include/opencv/opencv2/flann/general.h b/IPL/include/opencv/opencv2/flann/general.h index 9d5402a..ac848d6 100644 --- a/IPL/include/opencv/opencv2/flann/general.h +++ b/IPL/include/opencv/opencv2/flann/general.h @@ -31,6 +31,8 @@ #ifndef OPENCV_FLANN_GENERAL_H_ #define OPENCV_FLANN_GENERAL_H_ +//! @cond IGNORED + #include "opencv2/core.hpp" namespace cvflann @@ -46,5 +48,6 @@ class FLANNException : public cv::Exception } +//! @endcond #endif /* OPENCV_FLANN_GENERAL_H_ */ diff --git a/IPL/include/opencv/opencv2/flann/ground_truth.h b/IPL/include/opencv/opencv2/flann/ground_truth.h index fd8f3ae..17f2a8e 100644 --- a/IPL/include/opencv/opencv2/flann/ground_truth.h +++ b/IPL/include/opencv/opencv2/flann/ground_truth.h @@ -31,6 +31,8 @@ #ifndef OPENCV_FLANN_GROUND_TRUTH_H_ #define OPENCV_FLANN_GROUND_TRUTH_H_ +//! @cond IGNORED + #include "dist.h" #include "matrix.h" @@ -91,4 +93,6 @@ void compute_ground_truth(const Matrix& dataset, } +//! @endcond + #endif //OPENCV_FLANN_GROUND_TRUTH_H_ diff --git a/IPL/include/opencv/opencv2/flann/hdf5.h b/IPL/include/opencv/opencv2/flann/hdf5.h index 80d23b9..7554384 100644 --- a/IPL/include/opencv/opencv2/flann/hdf5.h +++ b/IPL/include/opencv/opencv2/flann/hdf5.h @@ -30,6 +30,8 @@ #ifndef OPENCV_FLANN_HDF5_H_ #define OPENCV_FLANN_HDF5_H_ +//! @cond IGNORED + #include #include "matrix.h" @@ -228,4 +230,6 @@ void load_from_file(cvflann::Matrix& dataset, const String& filename, const S #endif // HAVE_MPI } // namespace cvflann::mpi +//! @endcond + #endif /* OPENCV_FLANN_HDF5_H_ */ diff --git a/IPL/include/opencv/opencv2/flann/heap.h b/IPL/include/opencv/opencv2/flann/heap.h index 92a6ea6..ee1c682 100644 --- a/IPL/include/opencv/opencv2/flann/heap.h +++ b/IPL/include/opencv/opencv2/flann/heap.h @@ -31,6 +31,8 @@ #ifndef OPENCV_FLANN_HEAP_H_ #define OPENCV_FLANN_HEAP_H_ +//! @cond IGNORED + #include #include @@ -162,4 +164,6 @@ class Heap } +//! @endcond + #endif //OPENCV_FLANN_HEAP_H_ diff --git a/IPL/include/opencv/opencv2/flann/hierarchical_clustering_index.h b/IPL/include/opencv/opencv2/flann/hierarchical_clustering_index.h index 9d890d4..20304ed 100644 --- a/IPL/include/opencv/opencv2/flann/hierarchical_clustering_index.h +++ b/IPL/include/opencv/opencv2/flann/hierarchical_clustering_index.h @@ -31,6 +31,8 @@ #ifndef OPENCV_FLANN_HIERARCHICAL_CLUSTERING_INDEX_H_ #define OPENCV_FLANN_HIERARCHICAL_CLUSTERING_INDEX_H_ +//! @cond IGNORED + #include #include #include @@ -435,7 +437,7 @@ class HierarchicalClusteringIndex : public NNIndex /** * Returns size of index. */ - size_t size() const + size_t size() const CV_OVERRIDE { return size_; } @@ -443,7 +445,7 @@ class HierarchicalClusteringIndex : public NNIndex /** * Returns the length of an index feature. */ - size_t veclen() const + size_t veclen() const CV_OVERRIDE { return veclen_; } @@ -453,7 +455,7 @@ class HierarchicalClusteringIndex : public NNIndex * Computes the inde memory usage * Returns: memory used by the index */ - int usedMemory() const + int usedMemory() const CV_OVERRIDE { return pool.usedMemory+pool.wastedMemory+memoryCounter; } @@ -461,7 +463,7 @@ class HierarchicalClusteringIndex : public NNIndex /** * Builds the index */ - void buildIndex() + void buildIndex() CV_OVERRIDE { if (branching_<2) { throw FLANNException("Branching factor must be at least 2"); @@ -480,13 +482,13 @@ class HierarchicalClusteringIndex : public NNIndex } - flann_algorithm_t getType() const + flann_algorithm_t getType() const CV_OVERRIDE { return FLANN_INDEX_HIERARCHICAL; } - void saveIndex(FILE* stream) + void saveIndex(FILE* stream) CV_OVERRIDE { save_value(stream, branching_); save_value(stream, trees_); @@ -501,7 +503,7 @@ class HierarchicalClusteringIndex : public NNIndex } - void loadIndex(FILE* stream) + void loadIndex(FILE* stream) CV_OVERRIDE { free_elements(); @@ -544,7 +546,7 @@ class HierarchicalClusteringIndex : public NNIndex * vec = the vector for which to search the nearest neighbors * searchParams = parameters that influence the search algorithm (checks) */ - void findNeighbors(ResultSet& result, const ElementType* vec, const SearchParams& searchParams) + void findNeighbors(ResultSet& result, const ElementType* vec, const SearchParams& searchParams) CV_OVERRIDE { int maxChecks = get_param(searchParams,"checks",32); @@ -569,7 +571,7 @@ class HierarchicalClusteringIndex : public NNIndex } - IndexParams getParameters() const + IndexParams getParameters() const CV_OVERRIDE { return params; } @@ -578,7 +580,7 @@ class HierarchicalClusteringIndex : public NNIndex private: /** - * Struture representing a node in the hierarchical k-means tree. + * Structure representing a node in the hierarchical k-means tree. */ struct Node { @@ -845,4 +847,6 @@ class HierarchicalClusteringIndex : public NNIndex } +//! @endcond + #endif /* OPENCV_FLANN_HIERARCHICAL_CLUSTERING_INDEX_H_ */ diff --git a/IPL/include/opencv/opencv2/flann/index_testing.h b/IPL/include/opencv/opencv2/flann/index_testing.h index d764004..47b6f0b 100644 --- a/IPL/include/opencv/opencv2/flann/index_testing.h +++ b/IPL/include/opencv/opencv2/flann/index_testing.h @@ -31,6 +31,8 @@ #ifndef OPENCV_FLANN_INDEX_TESTING_H_ #define OPENCV_FLANN_INDEX_TESTING_H_ +//! @cond IGNORED + #include #include #include @@ -315,4 +317,6 @@ void test_index_precisions(NNIndex& index, const Matrix #include #include @@ -120,24 +122,29 @@ class KDTreeIndex : public NNIndex /** * Builds the index */ - void buildIndex() + void buildIndex() CV_OVERRIDE { /* Construct the randomized trees. */ for (int i = 0; i < trees_; i++) { /* Randomize the order of vectors to allow for unbiased sampling. */ +#ifndef OPENCV_FLANN_USE_STD_RAND + cv::randShuffle(vind_); +#else std::random_shuffle(vind_.begin(), vind_.end()); +#endif + tree_roots_[i] = divideTree(&vind_[0], int(size_) ); } } - flann_algorithm_t getType() const + flann_algorithm_t getType() const CV_OVERRIDE { return FLANN_INDEX_KDTREE; } - void saveIndex(FILE* stream) + void saveIndex(FILE* stream) CV_OVERRIDE { save_value(stream, trees_); for (int i=0; i - void loadIndex(FILE* stream) + void loadIndex(FILE* stream) CV_OVERRIDE { load_value(stream, trees_); if (tree_roots_!=NULL) { @@ -165,7 +172,7 @@ class KDTreeIndex : public NNIndex /** * Returns size of index. */ - size_t size() const + size_t size() const CV_OVERRIDE { return size_; } @@ -173,7 +180,7 @@ class KDTreeIndex : public NNIndex /** * Returns the length of an index feature. */ - size_t veclen() const + size_t veclen() const CV_OVERRIDE { return veclen_; } @@ -182,7 +189,7 @@ class KDTreeIndex : public NNIndex * Computes the inde memory usage * Returns: memory used by the index */ - int usedMemory() const + int usedMemory() const CV_OVERRIDE { return int(pool_.usedMemory+pool_.wastedMemory+dataset_.rows*sizeof(int)); // pool memory and vind array memory } @@ -196,7 +203,7 @@ class KDTreeIndex : public NNIndex * vec = the vector for which to search the nearest neighbors * maxCheck = the maximum number of restarts (in a best-bin-first manner) */ - void findNeighbors(ResultSet& result, const ElementType* vec, const SearchParams& searchParams) + void findNeighbors(ResultSet& result, const ElementType* vec, const SearchParams& searchParams) CV_OVERRIDE { int maxChecks = get_param(searchParams,"checks", 32); float epsError = 1+get_param(searchParams,"eps",0.0f); @@ -209,7 +216,7 @@ class KDTreeIndex : public NNIndex } } - IndexParams getParameters() const + IndexParams getParameters() const CV_OVERRIDE { return index_params_; } @@ -618,4 +625,6 @@ class KDTreeIndex : public NNIndex } +//! @endcond + #endif //OPENCV_FLANN_KDTREE_INDEX_H_ diff --git a/IPL/include/opencv/opencv2/flann/kdtree_single_index.h b/IPL/include/opencv/opencv2/flann/kdtree_single_index.h index 30488ad..fa38f9f 100644 --- a/IPL/include/opencv/opencv2/flann/kdtree_single_index.h +++ b/IPL/include/opencv/opencv2/flann/kdtree_single_index.h @@ -31,6 +31,8 @@ #ifndef OPENCV_FLANN_KDTREE_SINGLE_INDEX_H_ #define OPENCV_FLANN_KDTREE_SINGLE_INDEX_H_ +//! @cond IGNORED + #include #include #include @@ -87,6 +89,7 @@ class KDTreeSingleIndex : public NNIndex { size_ = dataset_.rows; dim_ = dataset_.cols; + root_node_ = 0; int dim_param = get_param(params,"dim",-1); if (dim_param>0) dim_ = dim_param; leaf_max_size_ = get_param(params,"leaf_max_size",10); @@ -113,7 +116,7 @@ class KDTreeSingleIndex : public NNIndex /** * Builds the index */ - void buildIndex() + void buildIndex() CV_OVERRIDE { computeBoundingBox(root_bbox_); root_node_ = divideTree(0, (int)size_, root_bbox_ ); // construct the tree @@ -132,13 +135,13 @@ class KDTreeSingleIndex : public NNIndex } } - flann_algorithm_t getType() const + flann_algorithm_t getType() const CV_OVERRIDE { return FLANN_INDEX_KDTREE_SINGLE; } - void saveIndex(FILE* stream) + void saveIndex(FILE* stream) CV_OVERRIDE { save_value(stream, size_); save_value(stream, dim_); @@ -153,7 +156,7 @@ class KDTreeSingleIndex : public NNIndex } - void loadIndex(FILE* stream) + void loadIndex(FILE* stream) CV_OVERRIDE { load_value(stream, size_); load_value(stream, dim_); @@ -178,7 +181,7 @@ class KDTreeSingleIndex : public NNIndex /** * Returns size of index. */ - size_t size() const + size_t size() const CV_OVERRIDE { return size_; } @@ -186,7 +189,7 @@ class KDTreeSingleIndex : public NNIndex /** * Returns the length of an index feature. */ - size_t veclen() const + size_t veclen() const CV_OVERRIDE { return dim_; } @@ -195,7 +198,7 @@ class KDTreeSingleIndex : public NNIndex * Computes the inde memory usage * Returns: memory used by the index */ - int usedMemory() const + int usedMemory() const CV_OVERRIDE { return (int)(pool_.usedMemory+pool_.wastedMemory+dataset_.rows*sizeof(int)); // pool memory and vind array memory } @@ -209,7 +212,7 @@ class KDTreeSingleIndex : public NNIndex * \param[in] knn Number of nearest neighbors to return * \param[in] params Search parameters */ - void knnSearch(const Matrix& queries, Matrix& indices, Matrix& dists, int knn, const SearchParams& params) + void knnSearch(const Matrix& queries, Matrix& indices, Matrix& dists, int knn, const SearchParams& params) CV_OVERRIDE { assert(queries.cols == veclen()); assert(indices.rows >= queries.rows); @@ -224,7 +227,7 @@ class KDTreeSingleIndex : public NNIndex } } - IndexParams getParameters() const + IndexParams getParameters() const CV_OVERRIDE { return index_params_; } @@ -238,7 +241,7 @@ class KDTreeSingleIndex : public NNIndex * vec = the vector for which to search the nearest neighbors * maxCheck = the maximum number of restarts (in a best-bin-first manner) */ - void findNeighbors(ResultSet& result, const ElementType* vec, const SearchParams& searchParams) + void findNeighbors(ResultSet& result, const ElementType* vec, const SearchParams& searchParams) CV_OVERRIDE { float epsError = 1+get_param(searchParams,"eps",0.0f); @@ -631,4 +634,6 @@ class KDTreeSingleIndex : public NNIndex } +//! @endcond + #endif //OPENCV_FLANN_KDTREE_SINGLE_INDEX_H_ diff --git a/IPL/include/opencv/opencv2/flann/kmeans_index.h b/IPL/include/opencv/opencv2/flann/kmeans_index.h index 226fc71..7574e7f 100644 --- a/IPL/include/opencv/opencv2/flann/kmeans_index.h +++ b/IPL/include/opencv/opencv2/flann/kmeans_index.h @@ -31,6 +31,8 @@ #ifndef OPENCV_FLANN_KMEANS_INDEX_H_ #define OPENCV_FLANN_KMEANS_INDEX_H_ +//! @cond IGNORED + #include #include #include @@ -266,7 +268,7 @@ class KMeansIndex : public NNIndex public: - flann_algorithm_t getType() const + flann_algorithm_t getType() const CV_OVERRIDE { return FLANN_INDEX_KMEANS; } @@ -276,30 +278,27 @@ class KMeansIndex : public NNIndex public: KMeansDistanceComputer(Distance _distance, const Matrix& _dataset, const int _branching, const int* _indices, const Matrix& _dcenters, const size_t _veclen, - int* _count, int* _belongs_to, std::vector& _radiuses, bool& _converged, cv::Mutex& _mtx) + std::vector &_new_centroids, std::vector &_sq_dists) : distance(_distance) , dataset(_dataset) , branching(_branching) , indices(_indices) , dcenters(_dcenters) , veclen(_veclen) - , count(_count) - , belongs_to(_belongs_to) - , radiuses(_radiuses) - , converged(_converged) - , mtx(_mtx) + , new_centroids(_new_centroids) + , sq_dists(_sq_dists) { } - void operator()(const cv::Range& range) const + void operator()(const cv::Range& range) const CV_OVERRIDE { const int begin = range.start; const int end = range.end; for( int i = begin; inew_sq_dist) { @@ -307,17 +306,8 @@ class KMeansIndex : public NNIndex sq_dist = new_sq_dist; } } - if (sq_dist > radiuses[new_centroid]) { - radiuses[new_centroid] = sq_dist; - } - if (new_centroid != belongs_to[i]) { - count[belongs_to[i]]--; - count[new_centroid]++; - belongs_to[i] = new_centroid; - mtx.lock(); - converged = false; - mtx.unlock(); - } + sq_dists[i] = sq_dist; + new_centroids[i] = new_centroid; } } @@ -328,11 +318,8 @@ class KMeansIndex : public NNIndex const int* indices; const Matrix& dcenters; const size_t veclen; - int* count; - int* belongs_to; - std::vector& radiuses; - bool& converged; - cv::Mutex& mtx; + std::vector &new_centroids; + std::vector &sq_dists; KMeansDistanceComputer& operator=( const KMeansDistanceComputer & ) { return *this; } }; @@ -398,7 +385,7 @@ class KMeansIndex : public NNIndex /** * Returns size of index. */ - size_t size() const + size_t size() const CV_OVERRIDE { return size_; } @@ -406,7 +393,7 @@ class KMeansIndex : public NNIndex /** * Returns the length of an index feature. */ - size_t veclen() const + size_t veclen() const CV_OVERRIDE { return veclen_; } @@ -421,7 +408,7 @@ class KMeansIndex : public NNIndex * Computes the inde memory usage * Returns: memory used by the index */ - int usedMemory() const + int usedMemory() const CV_OVERRIDE { return pool_.usedMemory+pool_.wastedMemory+memoryCounter_; } @@ -429,7 +416,7 @@ class KMeansIndex : public NNIndex /** * Builds the index */ - void buildIndex() + void buildIndex() CV_OVERRIDE { if (branching_<2) { throw FLANNException("Branching factor must be at least 2"); @@ -448,7 +435,7 @@ class KMeansIndex : public NNIndex } - void saveIndex(FILE* stream) + void saveIndex(FILE* stream) CV_OVERRIDE { save_value(stream, branching_); save_value(stream, iterations_); @@ -460,7 +447,7 @@ class KMeansIndex : public NNIndex } - void loadIndex(FILE* stream) + void loadIndex(FILE* stream) CV_OVERRIDE { load_value(stream, branching_); load_value(stream, iterations_); @@ -495,7 +482,7 @@ class KMeansIndex : public NNIndex * vec = the vector for which to search the nearest neighbors * searchParams = parameters that influence the search algorithm (checks, cb_index) */ - void findNeighbors(ResultSet& result, const ElementType* vec, const SearchParams& searchParams) + void findNeighbors(ResultSet& result, const ElementType* vec, const SearchParams& searchParams) CV_OVERRIDE { int maxChecks = get_param(searchParams,"checks",32); @@ -554,7 +541,7 @@ class KMeansIndex : public NNIndex return clusterCount; } - IndexParams getParameters() const + IndexParams getParameters() const CV_OVERRIDE { return index_params_; } @@ -562,7 +549,7 @@ class KMeansIndex : public NNIndex private: /** - * Struture representing a node in the hierarchical k-means tree. + * Structure representing a node in the hierarchical k-means tree. */ struct KMeansNode { @@ -726,7 +713,7 @@ class KMeansIndex : public NNIndex } cv::AutoBuffer centers_idx_buf(branching); - int* centers_idx = (int*)centers_idx_buf; + int* centers_idx = centers_idx_buf.data(); int centers_length; (this->*chooseCenters)(branching, indices, indices_length, centers_idx, centers_length); @@ -739,7 +726,7 @@ class KMeansIndex : public NNIndex cv::AutoBuffer dcenters_buf(branching*veclen_); - Matrix dcenters((double*)dcenters_buf,branching,veclen_); + Matrix dcenters(dcenters_buf.data(), branching, veclen_); for (int i=0; i std::vector radiuses(branching); cv::AutoBuffer count_buf(branching); - int* count = (int*)count_buf; + int* count = count_buf.data(); for (int i=0; i // assign points to clusters cv::AutoBuffer belongs_to_buf(indices_length); - int* belongs_to = (int*)belongs_to_buf; + int* belongs_to = belongs_to_buf.data(); for (int i=0; i } } + std::vector new_centroids(indices_length); + std::vector sq_dists(indices_length); + // reassign points to clusters - cv::Mutex mtx; - KMeansDistanceComputer invoker(distance_, dataset_, branching, indices, dcenters, veclen_, count, belongs_to, radiuses, converged, mtx); + KMeansDistanceComputer invoker(distance_, dataset_, branching, indices, dcenters, veclen_, new_centroids, sq_dists); parallel_for_(cv::Range(0, (int)indices_length), invoker); + for (int i=0; i < (int)indices_length; ++i) { + DistanceType sq_dist(sq_dists[i]); + int new_centroid(new_centroids[i]); + if (sq_dist > radiuses[new_centroid]) { + radiuses[new_centroid] = sq_dist; + } + if (new_centroid != belongs_to[i]) { + count[belongs_to[i]]--; + count[new_centroid]++; + belongs_to[i] = new_centroid; + converged = false; + } + } + for (int i=0; i computeClustering(node->childs[c],indices+start, end-start, branching, level+1); start=end; } + + delete[] centers; } @@ -1051,7 +1056,7 @@ class KMeansIndex : public NNIndex /** - * Helper function the descends in the hierarchical k-means tree by spliting those clusters that minimize + * Helper function the descends in the hierarchical k-means tree by splitting those clusters that minimize * the overall variance of the clustering. * Params: * root = root node @@ -1166,4 +1171,6 @@ class KMeansIndex : public NNIndex } +//! @endcond + #endif //OPENCV_FLANN_KMEANS_INDEX_H_ diff --git a/IPL/include/opencv/opencv2/flann/linear_index.h b/IPL/include/opencv/opencv2/flann/linear_index.h index 5aa7a5c..8a0f10f 100644 --- a/IPL/include/opencv/opencv2/flann/linear_index.h +++ b/IPL/include/opencv/opencv2/flann/linear_index.h @@ -31,6 +31,8 @@ #ifndef OPENCV_FLANN_LINEAR_INDEX_H_ #define OPENCV_FLANN_LINEAR_INDEX_H_ +//! @cond IGNORED + #include "general.h" #include "nn_index.h" @@ -63,47 +65,47 @@ class LinearIndex : public NNIndex LinearIndex(const LinearIndex&); LinearIndex& operator=(const LinearIndex&); - flann_algorithm_t getType() const + flann_algorithm_t getType() const CV_OVERRIDE { return FLANN_INDEX_LINEAR; } - size_t size() const + size_t size() const CV_OVERRIDE { return dataset_.rows; } - size_t veclen() const + size_t veclen() const CV_OVERRIDE { return dataset_.cols; } - int usedMemory() const + int usedMemory() const CV_OVERRIDE { return 0; } - void buildIndex() + void buildIndex() CV_OVERRIDE { /* nothing to do here for linear search */ } - void saveIndex(FILE*) + void saveIndex(FILE*) CV_OVERRIDE { /* nothing to do here for linear search */ } - void loadIndex(FILE*) + void loadIndex(FILE*) CV_OVERRIDE { /* nothing to do here for linear search */ index_params_["algorithm"] = getType(); } - void findNeighbors(ResultSet& resultSet, const ElementType* vec, const SearchParams& /*searchParams*/) + void findNeighbors(ResultSet& resultSet, const ElementType* vec, const SearchParams& /*searchParams*/) CV_OVERRIDE { ElementType* data = dataset_.data; for (size_t i = 0; i < dataset_.rows; ++i, data += dataset_.cols) { @@ -112,7 +114,7 @@ class LinearIndex : public NNIndex } } - IndexParams getParameters() const + IndexParams getParameters() const CV_OVERRIDE { return index_params_; } @@ -129,4 +131,6 @@ class LinearIndex : public NNIndex } +//! @endcond + #endif // OPENCV_FLANN_LINEAR_INDEX_H_ diff --git a/IPL/include/opencv/opencv2/flann/logger.h b/IPL/include/opencv/opencv2/flann/logger.h index 24f3fb6..8911812 100644 --- a/IPL/include/opencv/opencv2/flann/logger.h +++ b/IPL/include/opencv/opencv2/flann/logger.h @@ -31,6 +31,8 @@ #ifndef OPENCV_FLANN_LOGGER_H #define OPENCV_FLANN_LOGGER_H +//! @cond IGNORED + #include #include @@ -63,7 +65,12 @@ class Logger stream = stdout; } else { +#ifdef _MSC_VER + if (fopen_s(&stream, name, "w") != 0) + stream = NULL; +#else stream = fopen(name,"w"); +#endif if (stream == NULL) { stream = stdout; } @@ -127,4 +134,6 @@ class Logger } +//! @endcond + #endif //OPENCV_FLANN_LOGGER_H diff --git a/IPL/include/opencv/opencv2/flann/lsh_index.h b/IPL/include/opencv/opencv2/flann/lsh_index.h index 4d4670e..ee620da 100644 --- a/IPL/include/opencv/opencv2/flann/lsh_index.h +++ b/IPL/include/opencv/opencv2/flann/lsh_index.h @@ -35,6 +35,8 @@ #ifndef OPENCV_FLANN_LSH_INDEX_H_ #define OPENCV_FLANN_LSH_INDEX_H_ +//! @cond IGNORED + #include #include #include @@ -107,7 +109,7 @@ class LshIndex : public NNIndex /** * Builds the index */ - void buildIndex() + void buildIndex() CV_OVERRIDE { tables_.resize(table_number_); for (unsigned int i = 0; i < table_number_; ++i) { @@ -119,13 +121,13 @@ class LshIndex : public NNIndex } } - flann_algorithm_t getType() const + flann_algorithm_t getType() const CV_OVERRIDE { return FLANN_INDEX_LSH; } - void saveIndex(FILE* stream) + void saveIndex(FILE* stream) CV_OVERRIDE { save_value(stream,table_number_); save_value(stream,key_size_); @@ -133,7 +135,7 @@ class LshIndex : public NNIndex save_value(stream, dataset_); } - void loadIndex(FILE* stream) + void loadIndex(FILE* stream) CV_OVERRIDE { load_value(stream, table_number_); load_value(stream, key_size_); @@ -151,7 +153,7 @@ class LshIndex : public NNIndex /** * Returns size of index. */ - size_t size() const + size_t size() const CV_OVERRIDE { return dataset_.rows; } @@ -159,7 +161,7 @@ class LshIndex : public NNIndex /** * Returns the length of an index feature. */ - size_t veclen() const + size_t veclen() const CV_OVERRIDE { return feature_size_; } @@ -168,13 +170,13 @@ class LshIndex : public NNIndex * Computes the index memory usage * Returns: memory used by the index */ - int usedMemory() const + int usedMemory() const CV_OVERRIDE { return (int)(dataset_.rows * sizeof(int)); } - IndexParams getParameters() const + IndexParams getParameters() const CV_OVERRIDE { return index_params_; } @@ -187,7 +189,7 @@ class LshIndex : public NNIndex * \param[in] knn Number of nearest neighbors to return * \param[in] params Search parameters */ - virtual void knnSearch(const Matrix& queries, Matrix& indices, Matrix& dists, int knn, const SearchParams& params) + virtual void knnSearch(const Matrix& queries, Matrix& indices, Matrix& dists, int knn, const SearchParams& params) CV_OVERRIDE { assert(queries.cols == veclen()); assert(indices.rows >= queries.rows); @@ -217,7 +219,7 @@ class LshIndex : public NNIndex * vec = the vector for which to search the nearest neighbors * maxCheck = the maximum number of restarts (in a best-bin-first manner) */ - void findNeighbors(ResultSet& result, const ElementType* vec, const SearchParams& /*searchParams*/) + void findNeighbors(ResultSet& result, const ElementType* vec, const SearchParams& /*searchParams*/) CV_OVERRIDE { getNeighbors(vec, result); } @@ -389,4 +391,6 @@ class LshIndex : public NNIndex }; } +//! @endcond + #endif //OPENCV_FLANN_LSH_INDEX_H_ diff --git a/IPL/include/opencv/opencv2/flann/lsh_table.h b/IPL/include/opencv/opencv2/flann/lsh_table.h index 582dcdb..db8b5af 100644 --- a/IPL/include/opencv/opencv2/flann/lsh_table.h +++ b/IPL/include/opencv/opencv2/flann/lsh_table.h @@ -35,6 +35,8 @@ #ifndef OPENCV_FLANN_LSH_TABLE_H_ #define OPENCV_FLANN_LSH_TABLE_H_ +//! @cond IGNORED + #include #include #include @@ -146,6 +148,9 @@ class LshTable */ LshTable() { + key_size_ = 0; + feature_size_ = 0; + speed_level_ = kArray; } /** Default constructor @@ -155,8 +160,8 @@ class LshTable */ LshTable(unsigned int feature_size, unsigned int key_size) { - (void)feature_size; - (void)key_size; + feature_size_ = feature_size; + CV_UNUSED(key_size); std::cerr << "LSH is not implemented for that type" << std::endl; assert(0); } @@ -265,7 +270,7 @@ class LshTable { const size_t key_size_lower_bound = 1; //a value (size_t(1) << key_size) must fit the size_t type so key_size has to be strictly less than size of size_t - const size_t key_size_upper_bound = std::min(sizeof(BucketKey) * CHAR_BIT + 1, sizeof(size_t) * CHAR_BIT); + const size_t key_size_upper_bound = (std::min)(sizeof(BucketKey) * CHAR_BIT + 1, sizeof(size_t) * CHAR_BIT); if (key_size < key_size_lower_bound || key_size >= key_size_upper_bound) { CV_Error(cv::Error::StsBadArg, cv::format("Invalid key_size (=%d). Valid values for your system are %d <= key_size < %d.", (int)key_size, (int)key_size_lower_bound, (int)key_size_upper_bound)); @@ -330,6 +335,8 @@ class LshTable */ unsigned int key_size_; + unsigned int feature_size_; + // Members only used for the unsigned char specialization /** The mask to apply to a feature to get the hash key * Only used in the unsigned char case @@ -343,14 +350,19 @@ class LshTable template<> inline LshTable::LshTable(unsigned int feature_size, unsigned int subsignature_size) { + feature_size_ = feature_size; initialize(subsignature_size); // Allocate the mask - mask_ = std::vector((size_t)ceil((float)(feature_size * sizeof(char)) / (float)sizeof(size_t)), 0); + mask_ = std::vector((feature_size * sizeof(char) + sizeof(size_t) - 1) / sizeof(size_t), 0); // A bit brutal but fast to code - std::vector indices(feature_size * CHAR_BIT); - for (size_t i = 0; i < feature_size * CHAR_BIT; ++i) indices[i] = i; + std::vector indices(feature_size * CHAR_BIT); + for (size_t i = 0; i < feature_size * CHAR_BIT; ++i) indices[i] = (int)i; +#ifndef OPENCV_FLANN_USE_STD_RAND + cv::randShuffle(indices); +#else std::random_shuffle(indices.begin(), indices.end()); +#endif // Generate a random set of order of subsignature_size_ bits for (unsigned int i = 0; i < key_size_; ++i) { @@ -386,6 +398,7 @@ inline size_t LshTable::getKey(const unsigned char* feature) cons { // no need to check if T is dividable by sizeof(size_t) like in the Hamming // distance computation as we have a mask + // FIXIT: This is bad assumption, because we reading tail bytes after of the allocated features buffer const size_t* feature_block_ptr = reinterpret_cast ((const void*)feature); // Figure out the subsignature of the feature @@ -394,10 +407,20 @@ inline size_t LshTable::getKey(const unsigned char* feature) cons size_t subsignature = 0; size_t bit_index = 1; - for (std::vector::const_iterator pmask_block = mask_.begin(); pmask_block != mask_.end(); ++pmask_block) { + for (unsigned i = 0; i < feature_size_; i += sizeof(size_t)) { // get the mask and signature blocks - size_t feature_block = *feature_block_ptr; - size_t mask_block = *pmask_block; + size_t feature_block; + if (i <= feature_size_ - sizeof(size_t)) + { + feature_block = *feature_block_ptr; + } + else + { + size_t tmp = 0; + memcpy(&tmp, feature_block_ptr, feature_size_ - i); // preserve bytes order + feature_block = tmp; + } + size_t mask_block = mask_[i / sizeof(size_t)]; while (mask_block) { // Get the lowest set bit in the mask block size_t lowest_bit = mask_block & (-(ptrdiff_t)mask_block); @@ -489,4 +512,6 @@ inline LshStats LshTable::getStats() const //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//! @endcond + #endif /* OPENCV_FLANN_LSH_TABLE_H_ */ diff --git a/IPL/include/opencv/opencv2/flann/matrix.h b/IPL/include/opencv/opencv2/flann/matrix.h index 51b6c63..34893b7 100644 --- a/IPL/include/opencv/opencv2/flann/matrix.h +++ b/IPL/include/opencv/opencv2/flann/matrix.h @@ -31,6 +31,8 @@ #ifndef OPENCV_FLANN_DATASET_H_ #define OPENCV_FLANN_DATASET_H_ +//! @cond IGNORED + #include #include "general.h" @@ -66,7 +68,7 @@ class Matrix /** * Convenience function for deallocating the storage data. */ - FLANN_DEPRECATED void free() + CV_DEPRECATED void free() { fprintf(stderr, "The cvflann::Matrix::free() method is deprecated " "and it does not do any memory deallocation any more. You are" @@ -113,4 +115,6 @@ class UntypedMatrix } +//! @endcond + #endif //OPENCV_FLANN_DATASET_H_ diff --git a/IPL/include/opencv/opencv2/flann/miniflann.hpp b/IPL/include/opencv/opencv2/flann/miniflann.hpp index 02fa236..2532907 100644 --- a/IPL/include/opencv/opencv2/flann/miniflann.hpp +++ b/IPL/include/opencv/opencv2/flann/miniflann.hpp @@ -40,8 +40,10 @@ // //M*/ -#ifndef _OPENCV_MINIFLANN_HPP_ -#define _OPENCV_MINIFLANN_HPP_ +#ifndef OPENCV_MINIFLANN_HPP +#define OPENCV_MINIFLANN_HPP + +//! @cond IGNORED #include "opencv2/core.hpp" #include "opencv2/flann/defines.h" @@ -52,6 +54,20 @@ namespace cv namespace flann { +enum FlannIndexType { + FLANN_INDEX_TYPE_8U = CV_8U, + FLANN_INDEX_TYPE_8S = CV_8S, + FLANN_INDEX_TYPE_16U = CV_16U, + FLANN_INDEX_TYPE_16S = CV_16S, + FLANN_INDEX_TYPE_32S = CV_32S, + FLANN_INDEX_TYPE_32F = CV_32F, + FLANN_INDEX_TYPE_64F = CV_64F, + FLANN_INDEX_TYPE_STRING, + FLANN_INDEX_TYPE_BOOL, + FLANN_INDEX_TYPE_ALGORITHM, + LAST_VALUE_FLANN_INDEX_TYPE = FLANN_INDEX_TYPE_ALGORITHM +}; + struct CV_EXPORTS IndexParams { IndexParams(); @@ -68,12 +84,17 @@ struct CV_EXPORTS IndexParams void setBool(const String& key, bool value); void setAlgorithm(int value); + // FIXIT: replace by void write(FileStorage& fs) const + read() void getAll(std::vector& names, - std::vector& types, + std::vector& types, std::vector& strValues, std::vector& numValues) const; void* params; + +private: + IndexParams(const IndexParams &); // copy disabled + IndexParams& operator=(const IndexParams &); // assign disabled }; struct CV_EXPORTS KDTreeIndexParams : public IndexParams @@ -155,4 +176,6 @@ class CV_EXPORTS_W Index } } // namespace cv::flann +//! @endcond + #endif diff --git a/IPL/include/opencv/opencv2/flann/nn_index.h b/IPL/include/opencv/opencv2/flann/nn_index.h index 381d4bc..00fe6ec 100644 --- a/IPL/include/opencv/opencv2/flann/nn_index.h +++ b/IPL/include/opencv/opencv2/flann/nn_index.h @@ -36,6 +36,8 @@ #include "result_set.h" #include "params.h" +//! @cond IGNORED + namespace cvflann { @@ -174,4 +176,6 @@ class NNIndex } +//! @endcond + #endif //OPENCV_FLANN_NNINDEX_H diff --git a/IPL/include/opencv/opencv2/flann/object_factory.h b/IPL/include/opencv/opencv2/flann/object_factory.h index 7f971c5..5cc45ad 100644 --- a/IPL/include/opencv/opencv2/flann/object_factory.h +++ b/IPL/include/opencv/opencv2/flann/object_factory.h @@ -31,6 +31,8 @@ #ifndef OPENCV_FLANN_OBJECT_FACTORY_H_ #define OPENCV_FLANN_OBJECT_FACTORY_H_ +//! @cond IGNORED + #include namespace cvflann @@ -88,4 +90,6 @@ class ObjectFactory } +//! @endcond + #endif /* OPENCV_FLANN_OBJECT_FACTORY_H_ */ diff --git a/IPL/include/opencv/opencv2/flann/params.h b/IPL/include/opencv/opencv2/flann/params.h index 95ef4cd..b8f7331 100644 --- a/IPL/include/opencv/opencv2/flann/params.h +++ b/IPL/include/opencv/opencv2/flann/params.h @@ -30,6 +30,8 @@ #ifndef OPENCV_FLANN_PARAMS_H_ #define OPENCV_FLANN_PARAMS_H_ +//! @cond IGNORED + #include "any.h" #include "general.h" #include @@ -95,5 +97,6 @@ inline void print_params(const IndexParams& params) } +//! @endcond #endif /* OPENCV_FLANN_PARAMS_H_ */ diff --git a/IPL/include/opencv/opencv2/flann/random.h b/IPL/include/opencv/opencv2/flann/random.h index a3cf5ec..3bb48b6 100644 --- a/IPL/include/opencv/opencv2/flann/random.h +++ b/IPL/include/opencv/opencv2/flann/random.h @@ -31,6 +31,8 @@ #ifndef OPENCV_FLANN_RANDOM_H #define OPENCV_FLANN_RANDOM_H +//! @cond IGNORED + #include #include #include @@ -40,13 +42,31 @@ namespace cvflann { +inline int rand() +{ +#ifndef OPENCV_FLANN_USE_STD_RAND +# if INT_MAX == RAND_MAX + int v = cv::theRNG().next() & INT_MAX; +# else + int v = cv::theRNG().uniform(0, RAND_MAX + 1); +# endif +#else + int v = std::rand(); +#endif // OPENCV_FLANN_USE_STD_RAND + return v; +} + /** * Seeds the random number generator * @param seed Random seed */ inline void seed_random(unsigned int seed) { - srand(seed); +#ifndef OPENCV_FLANN_USE_STD_RAND + cv::theRNG() = cv::RNG(seed); +#else + std::srand(seed); +#endif } /* @@ -60,7 +80,7 @@ inline void seed_random(unsigned int seed) */ inline double rand_double(double high = 1.0, double low = 0) { - return low + ((high-low) * (std::rand() / (RAND_MAX + 1.0))); + return low + ((high-low) * (rand() / (RAND_MAX + 1.0))); } /** @@ -71,7 +91,7 @@ inline double rand_double(double high = 1.0, double low = 0) */ inline int rand_int(int high = RAND_MAX, int low = 0) { - return low + (int) ( double(high-low) * (std::rand() / (RAND_MAX + 1.0))); + return low + (int) ( double(high-low) * (rand() / (RAND_MAX + 1.0))); } /** @@ -107,7 +127,11 @@ class UniqueRandom for (int i = 0; i < size_; ++i) vals_[i] = i; // shuffle the elements in the array +#ifndef OPENCV_FLANN_USE_STD_RAND + cv::randShuffle(vals_); +#else std::random_shuffle(vals_.begin(), vals_.end()); +#endif counter_ = 0; } @@ -130,4 +154,6 @@ class UniqueRandom } +//! @endcond + #endif //OPENCV_FLANN_RANDOM_H diff --git a/IPL/include/opencv/opencv2/flann/result_set.h b/IPL/include/opencv/opencv2/flann/result_set.h index 9750019..735028f 100644 --- a/IPL/include/opencv/opencv2/flann/result_set.h +++ b/IPL/include/opencv/opencv2/flann/result_set.h @@ -31,6 +31,8 @@ #ifndef OPENCV_FLANN_RESULTSET_H #define OPENCV_FLANN_RESULTSET_H +//! @cond IGNORED + #include #include #include @@ -109,13 +111,13 @@ class KNNSimpleResultSet : public ResultSet return count; } - bool full() const + bool full() const CV_OVERRIDE { return count == capacity; } - void addPoint(DistanceType dist, int index) + void addPoint(DistanceType dist, int index) CV_OVERRIDE { if (dist >= worst_distance_) return; int i; @@ -139,7 +141,7 @@ class KNNSimpleResultSet : public ResultSet worst_distance_ = dists[capacity-1]; } - DistanceType worstDist() const + DistanceType worstDist() const CV_OVERRIDE { return worst_distance_; } @@ -176,13 +178,13 @@ class KNNResultSet : public ResultSet return count; } - bool full() const + bool full() const CV_OVERRIDE { return count == capacity; } - void addPoint(DistanceType dist, int index) + void addPoint(DistanceType dist, int index) CV_OVERRIDE { if (dist >= worst_distance_) return; int i; @@ -215,7 +217,7 @@ class KNNResultSet : public ResultSet worst_distance_ = dists[capacity-1]; } - DistanceType worstDist() const + DistanceType worstDist() const CV_OVERRIDE { return worst_distance_; } @@ -301,16 +303,16 @@ class UniqueResultSet : public ResultSet unsigned int index_; }; - /** Default cosntructor */ + /** Default constructor */ UniqueResultSet() : - worst_distance_(std::numeric_limits::max()) + is_full_(false), worst_distance_(std::numeric_limits::max()) { } /** Check the status of the set * @return true if we have k NN */ - inline bool full() const + inline bool full() const CV_OVERRIDE { return is_full_; } @@ -365,7 +367,7 @@ class UniqueResultSet : public ResultSet * If we don't have enough neighbors, it returns the max possible value * @return */ - inline DistanceType worstDist() const + inline DistanceType worstDist() const CV_OVERRIDE { return worst_distance_; } @@ -402,7 +404,7 @@ class KNNUniqueResultSet : public UniqueResultSet * @param dist distance for that neighbor * @param index index of that neighbor */ - inline void addPoint(DistanceType dist, int index) + inline void addPoint(DistanceType dist, int index) CV_OVERRIDE { // Don't do anything if we are worse than the worst if (dist >= worst_distance_) return; @@ -422,7 +424,7 @@ class KNNUniqueResultSet : public UniqueResultSet /** Remove all elements in the set */ - void clear() + void clear() CV_OVERRIDE { dist_indices_.clear(); worst_distance_ = std::numeric_limits::max(); @@ -461,14 +463,14 @@ class RadiusUniqueResultSet : public UniqueResultSet * @param dist distance for that neighbor * @param index index of that neighbor */ - void addPoint(DistanceType dist, int index) + void addPoint(DistanceType dist, int index) CV_OVERRIDE { if (dist <= radius_) dist_indices_.insert(DistIndex(dist, index)); } /** Remove all elements in the set */ - inline void clear() + inline void clear() CV_OVERRIDE { dist_indices_.clear(); } @@ -477,7 +479,7 @@ class RadiusUniqueResultSet : public UniqueResultSet /** Check the status of the set * @return alwys false */ - inline bool full() const + inline bool full() const CV_OVERRIDE { return true; } @@ -486,7 +488,7 @@ class RadiusUniqueResultSet : public UniqueResultSet * If we don't have enough neighbors, it returns the max possible value * @return */ - inline DistanceType worstDist() const + inline DistanceType worstDist() const CV_OVERRIDE { return radius_; } @@ -540,4 +542,6 @@ class KNNRadiusUniqueResultSet : public KNNUniqueResultSet }; } +//! @endcond + #endif //OPENCV_FLANN_RESULTSET_H diff --git a/IPL/include/opencv/opencv2/flann/sampling.h b/IPL/include/opencv/opencv2/flann/sampling.h index 396f177..4e452b9 100644 --- a/IPL/include/opencv/opencv2/flann/sampling.h +++ b/IPL/include/opencv/opencv2/flann/sampling.h @@ -30,6 +30,8 @@ #ifndef OPENCV_FLANN_SAMPLING_H_ #define OPENCV_FLANN_SAMPLING_H_ +//! @cond IGNORED + #include "matrix.h" #include "random.h" @@ -77,5 +79,6 @@ Matrix random_sample(const Matrix& srcMatrix, size_t size) } // namespace +//! @endcond #endif /* OPENCV_FLANN_SAMPLING_H_ */ diff --git a/IPL/include/opencv/opencv2/flann/saving.h b/IPL/include/opencv/opencv2/flann/saving.h index 7e3bea5..53359b4 100644 --- a/IPL/include/opencv/opencv2/flann/saving.h +++ b/IPL/include/opencv/opencv2/flann/saving.h @@ -29,6 +29,8 @@ #ifndef OPENCV_FLANN_SAVING_H_ #define OPENCV_FLANN_SAVING_H_ +//! @cond IGNORED + #include #include @@ -184,4 +186,6 @@ void load_value(FILE* stream, std::vector& value) } +//! @endcond + #endif /* OPENCV_FLANN_SAVING_H_ */ diff --git a/IPL/include/opencv/opencv2/flann/simplex_downhill.h b/IPL/include/opencv/opencv2/flann/simplex_downhill.h index 145901a..20b7e03 100644 --- a/IPL/include/opencv/opencv2/flann/simplex_downhill.h +++ b/IPL/include/opencv/opencv2/flann/simplex_downhill.h @@ -31,6 +31,8 @@ #ifndef OPENCV_FLANN_SIMPLEX_DOWNHILL_H_ #define OPENCV_FLANN_SIMPLEX_DOWNHILL_H_ +//! @cond IGNORED + namespace cvflann { @@ -129,7 +131,7 @@ float optimizeSimplexDownhill(T* points, int n, F func, float* vals = NULL ) } if (val_r #include "opencv2/core.hpp" #include "opencv2/core/utility.hpp" @@ -91,4 +93,6 @@ class StartStopTimer } +//! @endcond + #endif // FLANN_TIMER_H diff --git a/IPL/include/opencv/opencv2/fuzzy.hpp b/IPL/include/opencv/opencv2/fuzzy.hpp index 8a532c0..d660cc3 100644 --- a/IPL/include/opencv/opencv2/fuzzy.hpp +++ b/IPL/include/opencv/opencv2/fuzzy.hpp @@ -44,21 +44,26 @@ #include "opencv2/fuzzy/types.hpp" #include "opencv2/fuzzy/fuzzy_F0_math.hpp" +#include "opencv2/fuzzy/fuzzy_F1_math.hpp" #include "opencv2/fuzzy/fuzzy_image.hpp" /** @defgroup fuzzy Image processing based on fuzzy mathematics -Namespace for all functions is **ft**. The module brings implementation of the last image processing algorithms based on fuzzy mathematics. +Namespace for all functions is `ft`. The module brings implementation of the last image processing algorithms based on fuzzy mathematics. Method are named based on the pattern `FT`_degree_dimension`_`method. @{ - @defgroup f0_math Math with F0-transfrom support + @defgroup f0_math Math with F0-transform support -Fuzzy transform (F-transform) of the 0th degree transform whole image to a vector of its components. These components are used in latter computation. +Fuzzy transform (\f$F^0\f$-transform) of the 0th degree transforms whole image to a matrix of its components. These components are used in latter computation where each of them represents average color of certain subarea. + + @defgroup f1_math Math with F1-transform support + +Fuzzy transform (\f$F^1\f$-transform) of the 1th degree transforms whole image to a matrix of its components. Each component is polynomial of the 1th degree carrying information about average color and average gradient of certain subarea. @defgroup f_image Fuzzy image processing -Image proceesing based on F-transform is fast to process and easy to understand. +Image proceesing based on fuzzy mathematics namely F-transform. @} */ diff --git a/IPL/include/opencv/opencv2/fuzzy/fuzzy_F0_math.hpp b/IPL/include/opencv/opencv2/fuzzy/fuzzy_F0_math.hpp index e0a2c48..7beeb9b 100644 --- a/IPL/include/opencv/opencv2/fuzzy/fuzzy_F0_math.hpp +++ b/IPL/include/opencv/opencv2/fuzzy/fuzzy_F0_math.hpp @@ -53,64 +53,66 @@ namespace ft //! @addtogroup f0_math //! @{ - /** @brief Computes components of the array using direct F0-transform. - @param matrix Input 1-channel array. - @param kernel Kernel used for processing. Function **createKernel** can be used. - @param components Output 32-bit array for the components. + /** @brief Computes components of the array using direct \f$F^0\f$-transform. + @param matrix Input array. + @param kernel Kernel used for processing. Function `ft::createKernel` can be used. + @param components Output 32-bit float array for the components. @param mask Mask can be used for unwanted area marking. The function computes components using predefined kernel and mask. - - @note - F-transform technique is described in paper @cite Perf:FT. - */ - CV_EXPORTS void FT02D_components(InputArray matrix, InputArray kernel, OutputArray components, InputArray mask); - - /** @brief Computes components of the array using direct F0-transform. - @param matrix Input 1-channel array. - @param kernel Kernel used for processing. Function **createKernel** can be used. - @param components Output 32-bit array for the components. - - The function computes components using predefined kernel. - - @note - F-transform technique is described in paper @cite Perf:FT. */ - CV_EXPORTS void FT02D_components(InputArray matrix, InputArray kernel, OutputArray components); + CV_EXPORTS_W void FT02D_components(InputArray matrix, InputArray kernel, OutputArray components, InputArray mask = noArray()); - /** @brief Computes inverse F0-transfrom. - @param components Input 32-bit array for the components. - @param kernel Kernel used for processing. Function **createKernel** can be used. - @param output Output 32-bit array. + /** @brief Computes inverse \f$F^0\f$-transfrom. + @param components Input 32-bit float single channel array for the components. + @param kernel Kernel used for processing. Function `ft::createKernel` can be used. + @param output Output 32-bit float array. @param width Width of the output array. @param height Height of the output array. - @note - F-transform technique is described in paper @cite Perf:FT. + Computation of inverse F-transform. */ - CV_EXPORTS void FT02D_inverseFT(InputArray components, InputArray kernel, OutputArray output, int width, int height); + CV_EXPORTS_W void FT02D_inverseFT(InputArray components, InputArray kernel, OutputArray output, int width, int height); - /** @brief Computes F0-transfrom and inverse F0-transfrom at once. - @param image Input image. - @param kernel Kernel used for processing. Function **createKernel** can be used. - @param output Output 32-bit array. + /** @brief Computes \f$F^0\f$-transfrom and inverse \f$F^0\f$-transfrom at once. + @param matrix Input matrix. + @param kernel Kernel used for processing. Function `ft::createKernel` can be used. + @param output Output 32-bit float array. @param mask Mask used for unwanted area marking. - This function computes F-transfrom and inverse F-transfotm in one step. It is fully sufficient and optimized for **Mat**. + This function computes F-transfrom and inverse F-transfotm in one step. It is fully sufficient and optimized for `cv::Mat`. */ - CV_EXPORTS void FT02D_process(const Mat &image, const Mat &kernel, Mat &output, const Mat &mask); + CV_EXPORTS_W void FT02D_process(InputArray matrix, InputArray kernel, OutputArray output, InputArray mask = noArray()); - /** @brief Computes F0-transfrom and inverse F0-transfrom at once and return state. - @param image Input image. - @param kernel Kernel used for processing. Function **createKernel** can be used. - @param imageOutput Output 32-bit array. + /** @brief Computes \f$F^0\f$-transfrom and inverse \f$F^0\f$-transfrom at once and return state. + @param matrix Input matrix. + @param kernel Kernel used for processing. Function `ft::createKernel` can be used. + @param output Output 32-bit float array. @param mask Mask used for unwanted area marking. @param maskOutput Mask after one iteration. - @param firstStop If **true** function returns -1 when first problem appears. In case of **false**, the process is completed and summation of all problems returned. + @param firstStop If **true** function returns -1 when first problem appears. In case of `false` the process is completed and summation of all problems returned. + + This function computes iteration of F-transfrom and inverse F-transfotm and handle image and mask change. The function is used in `ft::inpaint` function. + */ + CV_EXPORTS_W int FT02D_iteration(InputArray matrix, InputArray kernel, OutputArray output, InputArray mask, OutputArray maskOutput, bool firstStop); + + /** @brief Sligtly less accurate version of \f$F^0\f$-transfrom computation optimized for higher speed. The methods counts with linear basic function. + @param matrix Input 3 channels matrix. + @param radius Radius of the `ft::LINEAR` basic function. + @param output Output array. + + This function computes F-transfrom and inverse F-transfotm using linear basic function in one step. It is ~10 times faster than `ft::FT02D_process` method. + */ + CV_EXPORTS_W void FT02D_FL_process(InputArray matrix, const int radius, OutputArray output); + + /** @brief Sligtly less accurate version of \f$F^0\f$-transfrom computation optimized for higher speed. The methods counts with linear basic function. + @param matrix Input 3 channels matrix. + @param radius Radius of the `ft::LINEAR` basic function. + @param output Output array. - This function computes iteration of F-transfrom and inverse F-transfotm and handle image and mask change. The function is used in *inpaint* function. + This function computes F-transfrom and inverse F-transfotm using linear basic function in one step. It is ~9 times faster then `ft::FT02D_process` method and more accurate than `ft::FT02D_FL_process` method. */ - CV_EXPORTS int FT02D_iteration(const Mat &image, const Mat &kernel, Mat &imageOutput, const Mat &mask, Mat &maskOutput, bool firstStop = true); + CV_EXPORTS_W void FT02D_FL_process_float(InputArray matrix, const int radius, OutputArray output); //! @} } diff --git a/IPL/include/opencv/opencv2/fuzzy/fuzzy_F1_math.hpp b/IPL/include/opencv/opencv2/fuzzy/fuzzy_F1_math.hpp new file mode 100644 index 0000000..3320e67 --- /dev/null +++ b/IPL/include/opencv/opencv2/fuzzy/fuzzy_F1_math.hpp @@ -0,0 +1,124 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2015, University of Ostrava, Institute for Research and Applications of Fuzzy Modeling, +// Pavel Vlasanek, all rights reserved. Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef __OPENCV_FUZZY_F1_MATH_H__ +#define __OPENCV_FUZZY_F1_MATH_H__ + +#include "opencv2/fuzzy/types.hpp" +#include "opencv2/core.hpp" + +namespace cv +{ + +namespace ft +{ + //! @addtogroup f1_math + //! @{ + + /** @brief Computes components of the array using direct \f$F^1\f$-transform. + @param matrix Input array. + @param kernel Kernel used for processing. Function `ft::createKernel` can be used. + @param components Output 32-bit float array for the components. + + The function computes linear components using predefined kernel. + */ + CV_EXPORTS_W void FT12D_components(InputArray matrix, InputArray kernel, OutputArray components); + + /** @brief Computes elements of \f$F^1\f$-transform components. + @param matrix Input array. + @param kernel Kernel used for processing. Function `ft::createKernel` can be used. + @param c00 Elements represent average color. + @param c10 Elements represent average vertical gradient. + @param c01 Elements represent average horizontal gradient. + @param components Output 32-bit float array for the components. + @param mask Mask can be used for unwanted area marking. + + The function computes components and its elements using predefined kernel and mask. + */ + CV_EXPORTS_W void FT12D_polynomial(InputArray matrix, InputArray kernel, OutputArray c00, OutputArray c10, OutputArray c01, OutputArray components, InputArray mask = noArray()); + + /** @brief Creates vertical matrix for \f$F^1\f$-transform computation. + @param radius Radius of the basic function. + @param matrix The vertical matrix. + @param chn Number of channels. + + The function creates helper vertical matrix for \f$F^1\f$-transfrom processing. It is used for gradient computation. + */ + CV_EXPORTS_W void FT12D_createPolynomMatrixVertical(int radius, OutputArray matrix, const int chn); + + /** @brief Creates horizontal matrix for \f$F^1\f$-transform computation. + @param radius Radius of the basic function. + @param matrix The horizontal matrix. + @param chn Number of channels. + + The function creates helper horizontal matrix for \f$F^1\f$-transfrom processing. It is used for gradient computation. + */ + CV_EXPORTS_W void FT12D_createPolynomMatrixHorizontal(int radius, OutputArray matrix, const int chn); + + /** @brief Computes \f$F^1\f$-transfrom and inverse \f$F^1\f$-transfrom at once. + @param matrix Input matrix. + @param kernel Kernel used for processing. Function `ft::createKernel` can be used. + @param output Output 32-bit float array. + @param mask Mask used for unwanted area marking. + + This function computes \f$F^1\f$-transfrom and inverse \f$F^1\f$-transfotm in one step. It is fully sufficient and optimized for `cv::Mat`. + + @note + F-transform technique of first degreee is described in paper @cite Vlas:FT. + */ + CV_EXPORTS_W void FT12D_process(InputArray matrix, InputArray kernel, OutputArray output, InputArray mask = noArray()); + + /** @brief Computes inverse \f$F^1\f$-transfrom. + @param components Input 32-bit float single channel array for the components. + @param kernel Kernel used for processing. The same kernel as for components computation must be used. + @param output Output 32-bit float array. + @param width Width of the output array. + @param height Height of the output array. + + Computation of inverse \f$F^1\f$-transform. + */ + CV_EXPORTS_W void FT12D_inverseFT(InputArray components, InputArray kernel, OutputArray output, int width, int height); + + //! @} +} +} + +#endif // __OPENCV_FUZZY_F1_MATH_H__ diff --git a/IPL/include/opencv/opencv2/fuzzy/fuzzy_image.hpp b/IPL/include/opencv/opencv2/fuzzy/fuzzy_image.hpp index 00a8efa..9c9eeff 100644 --- a/IPL/include/opencv/opencv2/fuzzy/fuzzy_image.hpp +++ b/IPL/include/opencv/opencv2/fuzzy/fuzzy_image.hpp @@ -56,23 +56,23 @@ namespace ft /** @brief Creates kernel from basic functions. @param A Basic function used in axis **x**. @param B Basic function used in axis **y**. - @param kernel Final 32-b kernel derived from **A** and **B**. + @param kernel Final 32-bit kernel derived from **A** and **B**. @param chn Number of kernel channels. The function creates kernel usable for latter fuzzy image processing. */ - CV_EXPORTS void createKernel(cv::InputArray A, cv::InputArray B, cv::OutputArray kernel, const int chn = 1); + CV_EXPORTS_AS(createKernel1) void createKernel(InputArray A, InputArray B, OutputArray kernel, const int chn); /** @brief Creates kernel from general functions. @param function Function type could be one of the following: - **LINEAR** Linear basic function. @param radius Radius of the basic function. - @param kernel Final 32-b kernel. + @param kernel Final 32-bit kernel. @param chn Number of kernel channels. The function creates kernel from predefined functions. */ - CV_EXPORTS void createKernel(int function, int radius, cv::OutputArray kernel, const int chn = 1); + CV_EXPORTS_W void createKernel(int function, int radius, OutputArray kernel, const int chn); /** @brief Image inpainting @param image Input image. @@ -80,27 +80,27 @@ namespace ft @param output Output 32-bit image. @param radius Radius of the basic function. @param function Function type could be one of the following: - - **LINEAR** Linear basic function. + - `ft::LINEAR` Linear basic function. @param algorithm Algorithm could be one of the following: - - **ONE_STEP** One step algorithm. - - **MULTI_STEP** Algorithm automaticaly increasing radius of the basic function. - - **ITERATIVE** Iterative algorithm running in more steps using partial computations. + - `ft::ONE_STEP` One step algorithm. + - `ft::MULTI_STEP` This algorithm automaticaly increases radius of the basic function. + - `ft::ITERATIVE` Iterative algorithm running in more steps using partial computations. This function provides inpainting technique based on the fuzzy mathematic. @note The algorithms are described in paper @cite Perf:rec. */ - CV_EXPORTS void inpaint(const cv::Mat &image, const cv::Mat &mask, cv::Mat &output, int radius = 2, int function = ft::LINEAR, int algorithm = ft::ONE_STEP); + CV_EXPORTS_W void inpaint(InputArray image, InputArray mask, OutputArray output, int radius, int function, int algorithm); /** @brief Image filtering @param image Input image. - @param kernel Final 32-b kernel. + @param kernel Final 32-bit kernel. @param output Output 32-bit image. Filtering of the input image by means of F-transform. */ - CV_EXPORTS void filter(const cv::Mat &image, const cv::Mat &kernel, cv::Mat &output); + CV_EXPORTS_W void filter(InputArray image, InputArray kernel, OutputArray output); //! @} } diff --git a/IPL/include/opencv/opencv2/fuzzy/types.hpp b/IPL/include/opencv/opencv2/fuzzy/types.hpp index ec831e6..22e0a01 100644 --- a/IPL/include/opencv/opencv2/fuzzy/types.hpp +++ b/IPL/include/opencv/opencv2/fuzzy/types.hpp @@ -52,15 +52,15 @@ namespace ft enum { - LINEAR = 1, - SINUS = 2 + LINEAR = 1, //!< linear (triangular) shape + SINUS = 2 //!< sinusoidal shape }; enum { - ONE_STEP = 1, - MULTI_STEP = 2, - ITERATIVE = 3 + ONE_STEP = 1, //!< processing in one step + MULTI_STEP = 2, //!< processing in multiple step + ITERATIVE = 3 //!< processing in several iterations }; //! @} diff --git a/IPL/include/opencv/opencv2/gapi.hpp b/IPL/include/opencv/opencv2/gapi.hpp new file mode 100644 index 0000000..2c99c86 --- /dev/null +++ b/IPL/include/opencv/opencv2/gapi.hpp @@ -0,0 +1,33 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// +// Copyright (C) 2018 Intel Corporation + + +#ifndef OPENCV_GAPI_HPP +#define OPENCV_GAPI_HPP + +#include + +/** \defgroup gapi G-API framework +@{ + @defgroup gapi_main_classes G-API Main Classes + @defgroup gapi_data_objects G-API Data Types + @{ + @defgroup gapi_meta_args G-API Metadata Descriptors + @} + @defgroup gapi_std_backends G-API Standard Backends + @defgroup gapi_compile_args G-API Graph Compilation Arguments +@} + */ + +#include +#include +#include +#include +#include +#include +#include + +#endif // OPENCV_GAPI_HPP diff --git a/IPL/include/opencv/opencv2/gapi/core.hpp b/IPL/include/opencv/opencv2/gapi/core.hpp new file mode 100644 index 0000000..00400a8 --- /dev/null +++ b/IPL/include/opencv/opencv2/gapi/core.hpp @@ -0,0 +1,1726 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// +// Copyright (C) 2018 Intel Corporation + + +#ifndef OPENCV_GAPI_CORE_HPP +#define OPENCV_GAPI_CORE_HPP + +#include + +#include // std::tuple + +#include + +#include +#include +#include + +/** \defgroup gapi_core G-API Core functionality +@{ + @defgroup gapi_math Graph API: Math operations + @defgroup gapi_pixelwise Graph API: Pixelwise operations + @defgroup gapi_matrixop Graph API: Operations on matrices + @defgroup gapi_transform Graph API: Image and channel composition functions +@} + */ +namespace cv { namespace gapi { +namespace core { + using GMat2 = std::tuple; + using GMat3 = std::tuple; // FIXME: how to avoid this? + using GMat4 = std::tuple; + using GMatScalar = std::tuple; + + G_TYPED_KERNEL(GAdd, , "org.opencv.core.math.add") { + static GMatDesc outMeta(GMatDesc a, GMatDesc b, int ddepth) { + if (ddepth == -1) + { + // OpenCV: When the input arrays in add/subtract/multiply/divide + // functions have different depths, the output array depth must be + // explicitly specified! + // See artim_op() @ arithm.cpp + GAPI_Assert(a.chan == b.chan); + GAPI_Assert(a.depth == b.depth); + return a; + } + return a.withDepth(ddepth); + } + }; + + G_TYPED_KERNEL(GAddC, , "org.opencv.core.math.addC") { + static GMatDesc outMeta(GMatDesc a, GScalarDesc, int ddepth) { + return a.withDepth(ddepth); + } + }; + + G_TYPED_KERNEL(GSub, , "org.opencv.core.math.sub") { + static GMatDesc outMeta(GMatDesc a, GMatDesc b, int ddepth) { + if (ddepth == -1) + { + // This macro should select a larger data depth from a and b + // considering the number of channels in the same + // FIXME!!! Clarify if it is valid for sub() + GAPI_Assert(a.chan == b.chan); + ddepth = std::max(a.depth, b.depth); + } + return a.withDepth(ddepth); + } + }; + + G_TYPED_KERNEL(GSubC, , "org.opencv.core.math.subC") { + static GMatDesc outMeta(GMatDesc a, GScalarDesc, int ddepth) { + return a.withDepth(ddepth); + } + }; + + G_TYPED_KERNEL(GSubRC,, "org.opencv.core.math.subRC") { + static GMatDesc outMeta(GScalarDesc, GMatDesc b, int ddepth) { + return b.withDepth(ddepth); + } + }; + + G_TYPED_KERNEL(GMul, , "org.opencv.core.math.mul") { + static GMatDesc outMeta(GMatDesc a, GMatDesc, double, int ddepth) { + return a.withDepth(ddepth); + } + }; + + G_TYPED_KERNEL(GMulCOld, , "org.opencv.core.math.mulCOld") { + static GMatDesc outMeta(GMatDesc a, double, int ddepth) { + return a.withDepth(ddepth); + } + }; + + G_TYPED_KERNEL(GMulC, , "org.opencv.core.math.mulC"){ + static GMatDesc outMeta(GMatDesc a, GScalarDesc, int ddepth) { + return a.withDepth(ddepth); + } + }; + + G_TYPED_KERNEL(GMulS, , "org.opencv.core.math.muls") { + static GMatDesc outMeta(GMatDesc a, GScalarDesc) { + return a; + } + }; // FIXME: Merge with MulC + + G_TYPED_KERNEL(GDiv, , "org.opencv.core.math.div") { + static GMatDesc outMeta(GMatDesc a, GMatDesc b, double, int ddepth) { + if (ddepth == -1) + { + GAPI_Assert(a.depth == b.depth); + return b; + } + return a.withDepth(ddepth); + } + }; + + G_TYPED_KERNEL(GDivC, , "org.opencv.core.math.divC") { + static GMatDesc outMeta(GMatDesc a, GScalarDesc, double, int ddepth) { + return a.withDepth(ddepth); + } + }; + + G_TYPED_KERNEL(GDivRC, , "org.opencv.core.math.divRC") { + static GMatDesc outMeta(GScalarDesc, GMatDesc b, double, int ddepth) { + return b.withDepth(ddepth); + } + }; + + G_TYPED_KERNEL(GMean, , "org.opencv.core.math.mean") { + static GScalarDesc outMeta(GMatDesc) { + return empty_scalar_desc(); + } + }; + + G_TYPED_KERNEL_M(GPolarToCart, , "org.opencv.core.math.polarToCart") { + static std::tuple outMeta(GMatDesc, GMatDesc a, bool) { + return std::make_tuple(a, a); + } + }; + + G_TYPED_KERNEL_M(GCartToPolar, , "org.opencv.core.math.cartToPolar") { + static std::tuple outMeta(GMatDesc x, GMatDesc, bool) { + return std::make_tuple(x, x); + } + }; + + G_TYPED_KERNEL(GPhase, , "org.opencv.core.math.phase") { + static GMatDesc outMeta(const GMatDesc &inx, const GMatDesc &, bool) { + return inx; + } + }; + + G_TYPED_KERNEL(GMask, , "org.opencv.core.pixelwise.mask") { + static GMatDesc outMeta(GMatDesc in, GMatDesc) { + return in; + } + }; + + G_TYPED_KERNEL(GCmpGT, , "org.opencv.core.pixelwise.compare.cmpGT") { + static GMatDesc outMeta(GMatDesc a, GMatDesc) { + return a.withDepth(CV_8U); + } + }; + + G_TYPED_KERNEL(GCmpGE, , "org.opencv.core.pixelwise.compare.cmpGE") { + static GMatDesc outMeta(GMatDesc a, GMatDesc) { + return a.withDepth(CV_8U); + } + }; + + G_TYPED_KERNEL(GCmpLE, , "org.opencv.core.pixelwise.compare.cmpLE") { + static GMatDesc outMeta(GMatDesc a, GMatDesc) { + return a.withDepth(CV_8U); + } + }; + + G_TYPED_KERNEL(GCmpLT, , "org.opencv.core.pixelwise.compare.cmpLT") { + static GMatDesc outMeta(GMatDesc a, GMatDesc) { + return a.withDepth(CV_8U); + } + }; + + G_TYPED_KERNEL(GCmpEQ, , "org.opencv.core.pixelwise.compare.cmpEQ") { + static GMatDesc outMeta(GMatDesc a, GMatDesc) { + return a.withDepth(CV_8U); + } + }; + + G_TYPED_KERNEL(GCmpNE, , "org.opencv.core.pixelwise.compare.cmpNE") { + static GMatDesc outMeta(GMatDesc a, GMatDesc) { + return a.withDepth(CV_8U); + } + }; + + G_TYPED_KERNEL(GCmpGTScalar, , "org.opencv.core.pixelwise.compare.cmpGTScalar"){ + static GMatDesc outMeta(GMatDesc a, GScalarDesc) { + return a.withDepth(CV_8U); + } + }; + + G_TYPED_KERNEL(GCmpGEScalar, , "org.opencv.core.pixelwise.compare.cmpGEScalar"){ + static GMatDesc outMeta(GMatDesc a, GScalarDesc) { + return a.withDepth(CV_8U); + } + }; + + G_TYPED_KERNEL(GCmpLEScalar, , "org.opencv.core.pixelwise.compare.cmpLEScalar"){ + static GMatDesc outMeta(GMatDesc a, GScalarDesc) { + return a.withDepth(CV_8U); + } + }; + + G_TYPED_KERNEL(GCmpLTScalar, , "org.opencv.core.pixelwise.compare.cmpLTScalar"){ + static GMatDesc outMeta(GMatDesc a, GScalarDesc) { + return a.withDepth(CV_8U); + } + }; + + G_TYPED_KERNEL(GCmpEQScalar, , "org.opencv.core.pixelwise.compare.cmpEQScalar"){ + static GMatDesc outMeta(GMatDesc a, GScalarDesc) { + return a.withDepth(CV_8U); + } + }; + + G_TYPED_KERNEL(GCmpNEScalar, , "org.opencv.core.pixelwise.compare.cmpNEScalar"){ + static GMatDesc outMeta(GMatDesc a, GScalarDesc) { + return a.withDepth(CV_8U); + } + }; + + G_TYPED_KERNEL(GAnd, , "org.opencv.core.pixelwise.bitwise_and") { + static GMatDesc outMeta(GMatDesc a, GMatDesc) { + return a; + } + }; + + G_TYPED_KERNEL(GAndS, , "org.opencv.core.pixelwise.bitwise_andS") { + static GMatDesc outMeta(GMatDesc a, GScalarDesc) { + return a; + } + }; + + G_TYPED_KERNEL(GOr, , "org.opencv.core.pixelwise.bitwise_or") { + static GMatDesc outMeta(GMatDesc a, GMatDesc) { + return a; + } + }; + + G_TYPED_KERNEL(GOrS, , "org.opencv.core.pixelwise.bitwise_orS") { + static GMatDesc outMeta(GMatDesc a, GScalarDesc) { + return a; + } + }; + + G_TYPED_KERNEL(GXor, , "org.opencv.core.pixelwise.bitwise_xor") { + static GMatDesc outMeta(GMatDesc a, GMatDesc) { + return a; + } + }; + + G_TYPED_KERNEL(GXorS, , "org.opencv.core.pixelwise.bitwise_xorS") { + static GMatDesc outMeta(GMatDesc a, GScalarDesc) { + return a; + } + }; + + G_TYPED_KERNEL(GNot, , "org.opencv.core.pixelwise.bitwise_not") { + static GMatDesc outMeta(GMatDesc a) { + return a; + } + }; + + G_TYPED_KERNEL(GSelect, , "org.opencv.core.pixelwise.select") { + static GMatDesc outMeta(GMatDesc a, GMatDesc, GMatDesc) { + return a; + } + }; + + G_TYPED_KERNEL(GMin, , "org.opencv.core.matrixop.min") { + static GMatDesc outMeta(GMatDesc a, GMatDesc) { + return a; + } + }; + + G_TYPED_KERNEL(GMax, , "org.opencv.core.matrixop.max") { + static GMatDesc outMeta(GMatDesc a, GMatDesc) { + return a; + } + }; + + G_TYPED_KERNEL(GAbsDiff, , "org.opencv.core.matrixop.absdiff") { + static GMatDesc outMeta(GMatDesc a, GMatDesc) { + return a; + } + }; + + G_TYPED_KERNEL(GAbsDiffC, , "org.opencv.core.matrixop.absdiffC") { + static GMatDesc outMeta(GMatDesc a, GScalarDesc) { + return a; + } + }; + + G_TYPED_KERNEL(GSum, , "org.opencv.core.matrixop.sum") { + static GScalarDesc outMeta(GMatDesc) { + return empty_scalar_desc(); + } + }; + + G_TYPED_KERNEL(GAddW, , "org.opencv.core.matrixop.addweighted") { + static GMatDesc outMeta(GMatDesc a, double, GMatDesc b, double, double, int ddepth) { + if (ddepth == -1) + { + // OpenCV: When the input arrays in add/subtract/multiply/divide + // functions have different depths, the output array depth must be + // explicitly specified! + // See artim_op() @ arithm.cpp + GAPI_Assert(a.chan == b.chan); + GAPI_Assert(a.depth == b.depth); + return a; + } + return a.withDepth(ddepth); + } + }; + + G_TYPED_KERNEL(GNormL1, , "org.opencv.core.matrixop.norml1") { + static GScalarDesc outMeta(GMatDesc) { + return empty_scalar_desc(); + } + }; + + G_TYPED_KERNEL(GNormL2, , "org.opencv.core.matrixop.norml2") { + static GScalarDesc outMeta(GMatDesc) { + return empty_scalar_desc(); + } + }; + + G_TYPED_KERNEL(GNormInf, , "org.opencv.core.matrixop.norminf") { + static GScalarDesc outMeta(GMatDesc) { + return empty_scalar_desc(); + } + }; + + G_TYPED_KERNEL_M(GIntegral, , "org.opencv.core.matrixop.integral") { + static std::tuple outMeta(GMatDesc in, int sd, int sqd) { + return std::make_tuple(in.withSizeDelta(1,1).withDepth(sd), + in.withSizeDelta(1,1).withDepth(sqd)); + } + }; + + G_TYPED_KERNEL(GThreshold, , "org.opencv.core.matrixop.threshold") { + static GMatDesc outMeta(GMatDesc in, GScalarDesc, GScalarDesc, int) { + return in; + } + }; + + + G_TYPED_KERNEL_M(GThresholdOT, , "org.opencv.core.matrixop.thresholdOT") { + static std::tuple outMeta(GMatDesc in, GScalarDesc, int) { + return std::make_tuple(in, empty_scalar_desc()); + } + }; + + G_TYPED_KERNEL(GInRange, , "org.opencv.core.matrixop.inrange") { + static GMatDesc outMeta(GMatDesc in, GScalarDesc, GScalarDesc) { + return in.withType(CV_8U, 1); + } + }; + + G_TYPED_KERNEL_M(GSplit3, , "org.opencv.core.transform.split3") { + static std::tuple outMeta(GMatDesc in) { + const auto out_depth = in.depth; + const auto out_desc = in.withType(out_depth, 1); + return std::make_tuple(out_desc, out_desc, out_desc); + } + }; + + G_TYPED_KERNEL_M(GSplit4, ,"org.opencv.core.transform.split4") { + static std::tuple outMeta(GMatDesc in) { + const auto out_depth = in.depth; + const auto out_desc = in.withType(out_depth, 1); + return std::make_tuple(out_desc, out_desc, out_desc, out_desc); + } + }; + + G_TYPED_KERNEL(GResize, , "org.opencv.core.transform.resize") { + static GMatDesc outMeta(GMatDesc in, Size sz, double fx, double fy, int) { + if (sz.width != 0 && sz.height != 0) + { + return in.withSize(sz); + } + else + { + GAPI_Assert(fx != 0. && fy != 0.); + return in.withSize + (Size(static_cast(round(in.size.width * fx)), + static_cast(round(in.size.height * fy)))); + } + } + }; + + G_TYPED_KERNEL(GResizeP, , "org.opencv.core.transform.resizeP") { + static GMatDesc outMeta(GMatDesc in, Size sz, int interp) { + GAPI_Assert(in.depth == CV_8U); + GAPI_Assert(in.chan == 3); + GAPI_Assert(in.planar); + GAPI_Assert(interp == cv::INTER_LINEAR); + return in.withSize(sz); + } + }; + + G_TYPED_KERNEL(GMerge3, , "org.opencv.core.transform.merge3") { + static GMatDesc outMeta(GMatDesc in, GMatDesc, GMatDesc) { + // Preserve depth and add channel component + return in.withType(in.depth, 3); + } + }; + + G_TYPED_KERNEL(GMerge4, , "org.opencv.core.transform.merge4") { + static GMatDesc outMeta(GMatDesc in, GMatDesc, GMatDesc, GMatDesc) { + // Preserve depth and add channel component + return in.withType(in.depth, 4); + } + }; + + G_TYPED_KERNEL(GRemap, , "org.opencv.core.transform.remap") { + static GMatDesc outMeta(GMatDesc in, Mat m1, Mat, int, int, Scalar) { + return in.withSize(m1.size()); + } + }; + + G_TYPED_KERNEL(GFlip, , "org.opencv.core.transform.flip") { + static GMatDesc outMeta(GMatDesc in, int) { + return in; + } + }; + + // TODO: eliminate the need in this kernel (streaming) + G_TYPED_KERNEL(GCrop, , "org.opencv.core.transform.crop") { + static GMatDesc outMeta(GMatDesc in, Rect rc) { + return in.withSize(Size(rc.width, rc.height)); + } + }; + + G_TYPED_KERNEL(GCopy, , "org.opencv.core.transform.copy") { + static GMatDesc outMeta(GMatDesc in) { + return in; + } + }; + + G_TYPED_KERNEL(GConcatHor, , "org.opencv.imgproc.transform.concatHor") { + static GMatDesc outMeta(GMatDesc l, GMatDesc r) { + return l.withSizeDelta(+r.size.width, 0); + } + }; + + G_TYPED_KERNEL(GConcatVert, , "org.opencv.imgproc.transform.concatVert") { + static GMatDesc outMeta(GMatDesc t, GMatDesc b) { + return t.withSizeDelta(0, +b.size.height); + } + }; + + G_TYPED_KERNEL(GLUT, , "org.opencv.core.transform.LUT") { + static GMatDesc outMeta(GMatDesc in, Mat) { + return in; + } + }; + + G_TYPED_KERNEL(GConvertTo, , "org.opencv.core.transform.convertTo") { + static GMatDesc outMeta(GMatDesc in, int rdepth, double, double) { + return rdepth < 0 ? in : in.withDepth(rdepth); + } + }; + + G_TYPED_KERNEL(GSqrt, , "org.opencv.core.math.sqrt") { + static GMatDesc outMeta(GMatDesc in) { + return in; + } + }; + + G_TYPED_KERNEL(GNormalize, , "org.opencv.core.normalize") { + static GMatDesc outMeta(GMatDesc in, double, double, int, int ddepth) { + // unlike opencv doesn't have a mask as a parameter + return (ddepth < 0 ? in : in.withDepth(ddepth)); + } + }; + + G_TYPED_KERNEL(GWarpPerspective, , "org.opencv.core.warpPerspective") { + static GMatDesc outMeta(GMatDesc in, const Mat&, Size dsize, int, int borderMode, const cv::Scalar&) { + GAPI_Assert((borderMode == cv::BORDER_CONSTANT || borderMode == cv::BORDER_REPLICATE) && + "cv::gapi::warpPerspective supports only cv::BORDER_CONSTANT and cv::BORDER_REPLICATE border modes"); + return in.withType(in.depth, in.chan).withSize(dsize); + } + }; + + G_TYPED_KERNEL(GWarpAffine, , "org.opencv.core.warpAffine") { + static GMatDesc outMeta(GMatDesc in, const Mat&, Size dsize, int, int border_mode, const cv::Scalar&) { + GAPI_Assert(border_mode != cv::BORDER_TRANSPARENT && + "cv::BORDER_TRANSPARENT mode is not supported in cv::gapi::warpAffine"); + return in.withType(in.depth, in.chan).withSize(dsize); + } + }; +} + +//! @addtogroup gapi_math +//! @{ + +/** @brief Calculates the per-element sum of two matrices. + +The function add calculates sum of two matrices of the same size and the same number of channels: +\f[\texttt{dst}(I) = \texttt{saturate} ( \texttt{src1}(I) + \texttt{src2}(I)) \quad \texttt{if mask}(I) \ne0\f] + +The function can be replaced with matrix expressions: + \f[\texttt{dst} = \texttt{src1} + \texttt{src2}\f] + +The input matrices and the output matrix can all have the same or different depths. For example, you +can add a 16-bit unsigned matrix to a 8-bit signed matrix and store the sum as a 32-bit +floating-point matrix. Depth of the output matrix is determined by the ddepth parameter. +If src1.depth() == src2.depth(), ddepth can be set to the default -1. In this case, the output matrix will have +the same depth as the input matrices. + +Supported matrix data types are @ref CV_8UC1, @ref CV_8UC3, @ref CV_16UC1, @ref CV_16SC1, @ref CV_32FC1. + +@note Function textual ID is "org.opencv.core.math.add" +@param src1 first input matrix. +@param src2 second input matrix. +@param ddepth optional depth of the output matrix. +@sa sub, addWeighted +*/ +GAPI_EXPORTS GMat add(const GMat& src1, const GMat& src2, int ddepth = -1); + +/** @brief Calculates the per-element sum of matrix and given scalar. + +The function addC adds a given scalar value to each element of given matrix. +The function can be replaced with matrix expressions: + + \f[\texttt{dst} = \texttt{src1} + \texttt{c}\f] + +Depth of the output matrix is determined by the ddepth parameter. +If ddepth is set to default -1, the depth of output matrix will be the same as the depth of input matrix. +The matrices can be single or multi channel. Output matrix must have the same size and number of channels as the input matrix. + +Supported matrix data types are @ref CV_8UC1, @ref CV_8UC3, @ref CV_16UC1, @ref CV_16SC1, @ref CV_32FC1. + +@note Function textual ID is "org.opencv.core.math.addC" +@param src1 first input matrix. +@param c scalar value to be added. +@param ddepth optional depth of the output matrix. +@sa sub, addWeighted +*/ +GAPI_EXPORTS GMat addC(const GMat& src1, const GScalar& c, int ddepth = -1); +//! @overload +GAPI_EXPORTS GMat addC(const GScalar& c, const GMat& src1, int ddepth = -1); + +/** @brief Calculates the per-element difference between two matrices. + +The function sub calculates difference between two matrices, when both matrices have the same size and the same number of +channels: + \f[\texttt{dst}(I) = \texttt{src1}(I) - \texttt{src2}(I)\f] + +The function can be replaced with matrix expressions: +\f[\texttt{dst} = \texttt{src1} - \texttt{src2}\f] + +The input matrices and the output matrix can all have the same or different depths. For example, you +can subtract two 8-bit unsigned matrices store the result as a 16-bit signed matrix. +Depth of the output matrix is determined by the ddepth parameter. +If src1.depth() == src2.depth(), ddepth can be set to the default -1. In this case, the output matrix will have +the same depth as the input matrices. The matrices can be single or multi channel. + +Supported matrix data types are @ref CV_8UC1, @ref CV_8UC3, @ref CV_16UC1, @ref CV_16SC1, @ref CV_32FC1. + +@note Function textual ID is "org.opencv.core.math.sub" +@param src1 first input matrix. +@param src2 second input matrix. +@param ddepth optional depth of the output matrix. +@sa add, addC + */ +GAPI_EXPORTS GMat sub(const GMat& src1, const GMat& src2, int ddepth = -1); + +/** @brief Calculates the per-element difference between matrix and given scalar. + +The function can be replaced with matrix expressions: + \f[\texttt{dst} = \texttt{src} - \texttt{c}\f] + +Depth of the output matrix is determined by the ddepth parameter. +If ddepth is set to default -1, the depth of output matrix will be the same as the depth of input matrix. +The matrices can be single or multi channel. Output matrix must have the same size as src. + +Supported matrix data types are @ref CV_8UC1, @ref CV_8UC3, @ref CV_16UC1, @ref CV_16SC1, @ref CV_32FC1. + +@note Function textual ID is "org.opencv.core.math.subC" +@param src first input matrix. +@param c scalar value to subtracted. +@param ddepth optional depth of the output matrix. +@sa add, addC, subRC + */ +GAPI_EXPORTS GMat subC(const GMat& src, const GScalar& c, int ddepth = -1); + +/** @brief Calculates the per-element difference between given scalar and the matrix. + +The function can be replaced with matrix expressions: + \f[\texttt{dst} = \texttt{val} - \texttt{src}\f] + +Depth of the output matrix is determined by the ddepth parameter. +If ddepth is set to default -1, the depth of output matrix will be the same as the depth of input matrix. +The matrices can be single or multi channel. Output matrix must have the same size as src. + +Supported matrix data types are @ref CV_8UC1, @ref CV_8UC3, @ref CV_16UC1, @ref CV_16SC1, @ref CV_32FC1. + +@note Function textual ID is "org.opencv.core.math.subRC" +@param c scalar value to subtract from. +@param src input matrix to be subtracted. +@param ddepth optional depth of the output matrix. +@sa add, addC, subC + */ +GAPI_EXPORTS GMat subRC(const GScalar& c, const GMat& src, int ddepth = -1); + +/** @brief Calculates the per-element scaled product of two matrices. + +The function mul calculates the per-element product of two matrices: + +\f[\texttt{dst} (I)= \texttt{saturate} ( \texttt{scale} \cdot \texttt{src1} (I) \cdot \texttt{src2} (I))\f] + +If src1.depth() == src2.depth(), ddepth can be set to the default -1. In this case, the output matrix will have +the same depth as the input matrices. The matrices can be single or multi channel. +Output matrix must have the same size as input matrices. + +Supported matrix data types are @ref CV_8UC1, @ref CV_8UC3, @ref CV_16UC1, @ref CV_16SC1, @ref CV_32FC1. + +@note Function textual ID is "org.opencv.core.math.mul" +@param src1 first input matrix. +@param src2 second input matrix of the same size and the same depth as src1. +@param scale optional scale factor. +@param ddepth optional depth of the output matrix. +@sa add, sub, div, addWeighted +*/ +GAPI_EXPORTS GMat mul(const GMat& src1, const GMat& src2, double scale = 1.0, int ddepth = -1); + +/** @brief Multiplies matrix by scalar. + +The function mulC multiplies each element of matrix src by given scalar value: + +\f[\texttt{dst} (I)= \texttt{saturate} ( \texttt{src1} (I) \cdot \texttt{multiplier} )\f] + +The matrices can be single or multi channel. Output matrix must have the same size as src. + +Supported matrix data types are @ref CV_8UC1, @ref CV_8UC3, @ref CV_16UC1, @ref CV_16SC1, @ref CV_32FC1. + +@note Function textual ID is "org.opencv.core.math.mulC" +@param src input matrix. +@param multiplier factor to be multiplied. +@param ddepth optional depth of the output matrix. If -1, the depth of output matrix will be the same as input matrix depth. +@sa add, sub, div, addWeighted +*/ +GAPI_EXPORTS GMat mulC(const GMat& src, double multiplier, int ddepth = -1); +//! @overload +GAPI_EXPORTS GMat mulC(const GMat& src, const GScalar& multiplier, int ddepth = -1); // FIXME: merge with mulc +//! @overload +GAPI_EXPORTS GMat mulC(const GScalar& multiplier, const GMat& src, int ddepth = -1); // FIXME: merge with mulc + +/** @brief Performs per-element division of two matrices. + +The function divides one matrix by another: +\f[\texttt{dst(I) = saturate(src1(I)*scale/src2(I))}\f] + +When src2(I) is zero, dst(I) will also be zero. Different channels of +multi-channel matrices are processed independently. +The matrices can be single or multi channel. Output matrix must have the same size and depth as src. + +Supported matrix data types are @ref CV_8UC1, @ref CV_8UC3, @ref CV_16UC1, @ref CV_16SC1, @ref CV_32FC1. + +@note Function textual ID is "org.opencv.core.math.div" +@param src1 first input matrix. +@param src2 second input matrix of the same size and depth as src1. +@param scale scalar factor. +@param ddepth optional depth of the output matrix; you can only pass -1 when src1.depth() == src2.depth(). +@sa mul, add, sub +*/ +GAPI_EXPORTS GMat div(const GMat& src1, const GMat& src2, double scale, int ddepth = -1); + +/** @brief Divides matrix by scalar. + +The function divC divides each element of matrix src by given scalar value: + +\f[\texttt{dst(I) = saturate(src(I)*scale/divisor)}\f] + +When divisor is zero, dst(I) will also be zero. Different channels of +multi-channel matrices are processed independently. +The matrices can be single or multi channel. Output matrix must have the same size and depth as src. + +Supported matrix data types are @ref CV_8UC1, @ref CV_8UC3, @ref CV_16UC1, @ref CV_16SC1, @ref CV_32FC1. + +@note Function textual ID is "org.opencv.core.math.divC" +@param src input matrix. +@param divisor number to be divided by. +@param ddepth optional depth of the output matrix. If -1, the depth of output matrix will be the same as input matrix depth. +@param scale scale factor. +@sa add, sub, div, addWeighted +*/ +GAPI_EXPORTS GMat divC(const GMat& src, const GScalar& divisor, double scale, int ddepth = -1); + +/** @brief Divides scalar by matrix. + +The function divRC divides given scalar by each element of matrix src and keep the division result in new matrix of the same size and type as src: + +\f[\texttt{dst(I) = saturate(divident*scale/src(I))}\f] + +When src(I) is zero, dst(I) will also be zero. Different channels of +multi-channel matrices are processed independently. +The matrices can be single or multi channel. Output matrix must have the same size and depth as src. + +Supported matrix data types are @ref CV_8UC1, @ref CV_8UC3, @ref CV_16UC1, @ref CV_16SC1, @ref CV_32FC1. + +@note Function textual ID is "org.opencv.core.math.divRC" +@param src input matrix. +@param divident number to be divided. +@param ddepth optional depth of the output matrix. If -1, the depth of output matrix will be the same as input matrix depth. +@param scale scale factor +@sa add, sub, div, addWeighted +*/ +GAPI_EXPORTS GMat divRC(const GScalar& divident, const GMat& src, double scale, int ddepth = -1); + +/** @brief Applies a mask to a matrix. + +The function mask set value from given matrix if the corresponding pixel value in mask matrix set to true, +and set the matrix value to 0 otherwise. + +Supported src matrix data types are @ref CV_8UC1, @ref CV_16SC1, @ref CV_16UC1. Supported mask data type is @ref CV_8UC1. + +@note Function textual ID is "org.opencv.core.math.mask" +@param src input matrix. +@param mask input mask matrix. +*/ +GAPI_EXPORTS GMat mask(const GMat& src, const GMat& mask); + +/** @brief Calculates an average (mean) of matrix elements. + +The function mean calculates the mean value M of matrix elements, +independently for each channel, and return it. + +Supported matrix data types are @ref CV_8UC1, @ref CV_8UC3, @ref CV_16UC1, @ref CV_16SC1, @ref CV_32FC1. + +@note Function textual ID is "org.opencv.core.math.mean" +@param src input matrix. +*/ +GAPI_EXPORTS GScalar mean(const GMat& src); + +/** @brief Calculates x and y coordinates of 2D vectors from their magnitude and angle. + +The function polarToCart calculates the Cartesian coordinates of each 2D +vector represented by the corresponding elements of magnitude and angle: +\f[\begin{array}{l} \texttt{x} (I) = \texttt{magnitude} (I) \cos ( \texttt{angle} (I)) \\ \texttt{y} (I) = \texttt{magnitude} (I) \sin ( \texttt{angle} (I)) \\ \end{array}\f] + +The relative accuracy of the estimated coordinates is about 1e-6. + +First output is a matrix of x-coordinates of 2D vectors. +Second output is a matrix of y-coordinates of 2D vectors. +Both output must have the same size and depth as input matrices. + +@note Function textual ID is "org.opencv.core.math.polarToCart" + +@param magnitude input floating-point @ref CV_32FC1 matrix (1xN) of magnitudes of 2D vectors; +@param angle input floating-point @ref CV_32FC1 matrix (1xN) of angles of 2D vectors. +@param angleInDegrees when true, the input angles are measured in +degrees, otherwise, they are measured in radians. +@sa cartToPolar, exp, log, pow, sqrt +*/ +GAPI_EXPORTS std::tuple polarToCart(const GMat& magnitude, const GMat& angle, + bool angleInDegrees = false); + +/** @brief Calculates the magnitude and angle of 2D vectors. + +The function cartToPolar calculates either the magnitude, angle, or both +for every 2D vector (x(I),y(I)): +\f[\begin{array}{l} \texttt{magnitude} (I)= \sqrt{\texttt{x}(I)^2+\texttt{y}(I)^2} , \\ \texttt{angle} (I)= \texttt{atan2} ( \texttt{y} (I), \texttt{x} (I))[ \cdot180 / \pi ] \end{array}\f] + +The angles are calculated with accuracy about 0.3 degrees. For the point +(0,0), the angle is set to 0. + +First output is a matrix of magnitudes of the same size and depth as input x. +Second output is a matrix of angles that has the same size and depth as +x; the angles are measured in radians (from 0 to 2\*Pi) or in degrees (0 to 360 degrees). + +@note Function textual ID is "org.opencv.core.math.cartToPolar" + +@param x matrix of @ref CV_32FC1 x-coordinates. +@param y array of @ref CV_32FC1 y-coordinates. +@param angleInDegrees a flag, indicating whether the angles are measured +in radians (which is by default), or in degrees. +@sa polarToCart +*/ +GAPI_EXPORTS std::tuple cartToPolar(const GMat& x, const GMat& y, + bool angleInDegrees = false); + +/** @brief Calculates the rotation angle of 2D vectors. + +The function cv::phase calculates the rotation angle of each 2D vector that +is formed from the corresponding elements of x and y : +\f[\texttt{angle} (I) = \texttt{atan2} ( \texttt{y} (I), \texttt{x} (I))\f] + +The angle estimation accuracy is about 0.3 degrees. When x(I)=y(I)=0 , +the corresponding angle(I) is set to 0. +@param x input floating-point array of x-coordinates of 2D vectors. +@param y input array of y-coordinates of 2D vectors; it must have the +same size and the same type as x. +@param angleInDegrees when true, the function calculates the angle in +degrees, otherwise, they are measured in radians. +@return array of vector angles; it has the same size and same type as x. +*/ +GAPI_EXPORTS GMat phase(const GMat& x, const GMat &y, bool angleInDegrees = false); + +/** @brief Calculates a square root of array elements. + +The function cv::gapi::sqrt calculates a square root of each input array element. +In case of multi-channel arrays, each channel is processed +independently. The accuracy is approximately the same as of the built-in +std::sqrt . +@param src input floating-point array. +@return output array of the same size and type as src. +*/ +GAPI_EXPORTS GMat sqrt(const GMat &src); + +//! @} gapi_math +//! +//! @addtogroup gapi_pixelwise +//! @{ + +/** @brief Performs the per-element comparison of two matrices checking if elements from first matrix are greater compare to elements in second. + +The function compares elements of two matrices src1 and src2 of the same size: + \f[\texttt{dst} (I) = \texttt{src1} (I) > \texttt{src2} (I)\f] + +When the comparison result is true, the corresponding element of output +array is set to 255. The comparison operations can be replaced with the +equivalent matrix expressions: +\f[\texttt{dst} = \texttt{src1} > \texttt{src2}\f] + +Output matrix of depth @ref CV_8U must have the same size and the same number of channels as + the input matrices/matrix. + +Supported input matrix data types are @ref CV_8UC1, @ref CV_16UC1, @ref CV_16SC1, @ref CV_32FC1. + +@note Function textual ID is "org.opencv.core.pixelwise.compare.cmpGT" +@param src1 first input matrix. +@param src2 second input matrix/scalar of the same depth as first input matrix. +@sa min, max, threshold, cmpLE, cmpGE, cmpLS +*/ +GAPI_EXPORTS GMat cmpGT(const GMat& src1, const GMat& src2); +/** @overload +@note Function textual ID is "org.opencv.core.pixelwise.compare.cmpGTScalar" +*/ +GAPI_EXPORTS GMat cmpGT(const GMat& src1, const GScalar& src2); + +/** @brief Performs the per-element comparison of two matrices checking if elements from first matrix are less than elements in second. + +The function compares elements of two matrices src1 and src2 of the same size: + \f[\texttt{dst} (I) = \texttt{src1} (I) < \texttt{src2} (I)\f] + +When the comparison result is true, the corresponding element of output +array is set to 255. The comparison operations can be replaced with the +equivalent matrix expressions: + \f[\texttt{dst} = \texttt{src1} < \texttt{src2}\f] + +Output matrix of depth @ref CV_8U must have the same size and the same number of channels as + the input matrices/matrix. + +Supported input matrix data types are @ref CV_8UC1, @ref CV_8UC3, @ref CV_16UC1, @ref CV_16SC1, @ref CV_32FC1. + +@note Function textual ID is "org.opencv.core.pixelwise.compare.cmpLT" +@param src1 first input matrix. +@param src2 second input matrix/scalar of the same depth as first input matrix. +@sa min, max, threshold, cmpLE, cmpGE, cmpGT +*/ +GAPI_EXPORTS GMat cmpLT(const GMat& src1, const GMat& src2); +/** @overload +@note Function textual ID is "org.opencv.core.pixelwise.compare.cmpLTScalar" +*/ +GAPI_EXPORTS GMat cmpLT(const GMat& src1, const GScalar& src2); + +/** @brief Performs the per-element comparison of two matrices checking if elements from first matrix are greater or equal compare to elements in second. + +The function compares elements of two matrices src1 and src2 of the same size: + \f[\texttt{dst} (I) = \texttt{src1} (I) >= \texttt{src2} (I)\f] + +When the comparison result is true, the corresponding element of output +array is set to 255. The comparison operations can be replaced with the +equivalent matrix expressions: + \f[\texttt{dst} = \texttt{src1} >= \texttt{src2}\f] + +Output matrix of depth @ref CV_8U must have the same size and the same number of channels as + the input matrices. + +Supported input matrix data types are @ref CV_8UC1, @ref CV_8UC3, @ref CV_16UC1, @ref CV_16SC1, @ref CV_32FC1. + +@note Function textual ID is "org.opencv.core.pixelwise.compare.cmpGE" +@param src1 first input matrix. +@param src2 second input matrix/scalar of the same depth as first input matrix. +@sa min, max, threshold, cmpLE, cmpGT, cmpLS +*/ +GAPI_EXPORTS GMat cmpGE(const GMat& src1, const GMat& src2); +/** @overload +@note Function textual ID is "org.opencv.core.pixelwise.compare.cmpLGEcalar" +*/ +GAPI_EXPORTS GMat cmpGE(const GMat& src1, const GScalar& src2); + +/** @brief Performs the per-element comparison of two matrices checking if elements from first matrix are less or equal compare to elements in second. + +The function compares elements of two matrices src1 and src2 of the same size: + \f[\texttt{dst} (I) = \texttt{src1} (I) <= \texttt{src2} (I)\f] + +When the comparison result is true, the corresponding element of output +array is set to 255. The comparison operations can be replaced with the +equivalent matrix expressions: + \f[\texttt{dst} = \texttt{src1} <= \texttt{src2}\f] + +Output matrix of depth @ref CV_8U must have the same size and the same number of channels as + the input matrices. + +Supported input matrix data types are @ref CV_8UC1, @ref CV_8UC3, @ref CV_16UC1, @ref CV_16SC1, @ref CV_32FC1. + +@note Function textual ID is "org.opencv.core.pixelwise.compare.cmpLE" +@param src1 first input matrix. +@param src2 second input matrix/scalar of the same depth as first input matrix. +@sa min, max, threshold, cmpGT, cmpGE, cmpLS +*/ +GAPI_EXPORTS GMat cmpLE(const GMat& src1, const GMat& src2); +/** @overload +@note Function textual ID is "org.opencv.core.pixelwise.compare.cmpLEScalar" +*/ +GAPI_EXPORTS GMat cmpLE(const GMat& src1, const GScalar& src2); + +/** @brief Performs the per-element comparison of two matrices checking if elements from first matrix are equal to elements in second. + +The function compares elements of two matrices src1 and src2 of the same size: + \f[\texttt{dst} (I) = \texttt{src1} (I) == \texttt{src2} (I)\f] + +When the comparison result is true, the corresponding element of output +array is set to 255. The comparison operations can be replaced with the +equivalent matrix expressions: + \f[\texttt{dst} = \texttt{src1} == \texttt{src2}\f] + +Output matrix of depth @ref CV_8U must have the same size and the same number of channels as + the input matrices. + +Supported input matrix data types are @ref CV_8UC1, @ref CV_8UC3, @ref CV_16UC1, @ref CV_16SC1, @ref CV_32FC1. + +@note Function textual ID is "org.opencv.core.pixelwise.compare.cmpEQ" +@param src1 first input matrix. +@param src2 second input matrix/scalar of the same depth as first input matrix. +@sa min, max, threshold, cmpNE +*/ +GAPI_EXPORTS GMat cmpEQ(const GMat& src1, const GMat& src2); +/** @overload +@note Function textual ID is "org.opencv.core.pixelwise.compare.cmpEQScalar" +*/ +GAPI_EXPORTS GMat cmpEQ(const GMat& src1, const GScalar& src2); + +/** @brief Performs the per-element comparison of two matrices checking if elements from first matrix are not equal to elements in second. + +The function compares elements of two matrices src1 and src2 of the same size: + \f[\texttt{dst} (I) = \texttt{src1} (I) != \texttt{src2} (I)\f] + +When the comparison result is true, the corresponding element of output +array is set to 255. The comparison operations can be replaced with the +equivalent matrix expressions: + \f[\texttt{dst} = \texttt{src1} != \texttt{src2}\f] + +Output matrix of depth @ref CV_8U must have the same size and the same number of channels as + the input matrices. + +Supported input matrix data types are @ref CV_8UC1, @ref CV_8UC3, @ref CV_16UC1, @ref CV_16SC1, @ref CV_32FC1. + +@note Function textual ID is "org.opencv.core.pixelwise.compare.cmpNE" +@param src1 first input matrix. +@param src2 second input matrix/scalar of the same depth as first input matrix. +@sa min, max, threshold, cmpEQ +*/ +GAPI_EXPORTS GMat cmpNE(const GMat& src1, const GMat& src2); +/** @overload +@note Function textual ID is "org.opencv.core.pixelwise.compare.cmpNEScalar" +*/ +GAPI_EXPORTS GMat cmpNE(const GMat& src1, const GScalar& src2); + +/** @brief computes bitwise conjunction of the two matrixes (src1 & src2) +Calculates the per-element bit-wise logical conjunction of two matrices of the same size. + +In case of floating-point matrices, their machine-specific bit +representations (usually IEEE754-compliant) are used for the operation. +In case of multi-channel matrices, each channel is processed +independently. Output matrix must have the same size and depth as the input +matrices. + +Supported matrix data types are @ref CV_8UC1, @ref CV_8UC3, @ref CV_16UC1, @ref CV_16SC1, @ref CV_32FC1. + +@note Function textual ID is "org.opencv.core.pixelwise.bitwise_and" + +@param src1 first input matrix. +@param src2 second input matrix. +*/ +GAPI_EXPORTS GMat bitwise_and(const GMat& src1, const GMat& src2); +/** @overload +@note Function textual ID is "org.opencv.core.pixelwise.compare.bitwise_andS" +@param src1 first input matrix. +@param src2 scalar, which will be per-lemenetly conjuncted with elements of src1. +*/ +GAPI_EXPORTS GMat bitwise_and(const GMat& src1, const GScalar& src2); + +/** @brief computes bitwise disjunction of the two matrixes (src1 | src2) +Calculates the per-element bit-wise logical disjunction of two matrices of the same size. + +In case of floating-point matrices, their machine-specific bit +representations (usually IEEE754-compliant) are used for the operation. +In case of multi-channel matrices, each channel is processed +independently. Output matrix must have the same size and depth as the input +matrices. + +Supported matrix data types are @ref CV_8UC1, @ref CV_8UC3, @ref CV_16UC1, @ref CV_16SC1, @ref CV_32FC1. + +@note Function textual ID is "org.opencv.core.pixelwise.bitwise_or" + +@param src1 first input matrix. +@param src2 second input matrix. +*/ +GAPI_EXPORTS GMat bitwise_or(const GMat& src1, const GMat& src2); +/** @overload +@note Function textual ID is "org.opencv.core.pixelwise.compare.bitwise_orS" +@param src1 first input matrix. +@param src2 scalar, which will be per-lemenetly disjuncted with elements of src1. +*/ +GAPI_EXPORTS GMat bitwise_or(const GMat& src1, const GScalar& src2); + + +/** @brief computes bitwise logical "exclusive or" of the two matrixes (src1 ^ src2) +Calculates the per-element bit-wise logical "exclusive or" of two matrices of the same size. + +In case of floating-point matrices, their machine-specific bit +representations (usually IEEE754-compliant) are used for the operation. +In case of multi-channel matrices, each channel is processed +independently. Output matrix must have the same size and depth as the input +matrices. + +Supported matrix data types are @ref CV_8UC1, @ref CV_8UC3, @ref CV_16UC1, @ref CV_16SC1, @ref CV_32FC1. + +@note Function textual ID is "org.opencv.core.pixelwise.bitwise_xor" + +@param src1 first input matrix. +@param src2 second input matrix. +*/ +GAPI_EXPORTS GMat bitwise_xor(const GMat& src1, const GMat& src2); +/** @overload +@note Function textual ID is "org.opencv.core.pixelwise.compare.bitwise_xorS" +@param src1 first input matrix. +@param src2 scalar, for which per-lemenet "logical or" operation on elements of src1 will be performed. +*/ +GAPI_EXPORTS GMat bitwise_xor(const GMat& src1, const GScalar& src2); + + +/** @brief Inverts every bit of an array. +The function bitwise_not calculates per-element bit-wise inversion of the input +matrix: +\f[\texttt{dst} (I) = \neg \texttt{src} (I)\f] + +In case of floating-point matrices, their machine-specific bit +representations (usually IEEE754-compliant) are used for the operation. +In case of multi-channel matrices, each channel is processed +independently. Output matrix must have the same size and depth as the input +matrix. + +Supported matrix data types are @ref CV_8UC1, @ref CV_8UC3, @ref CV_16UC1, @ref CV_16SC1, @ref CV_32FC1. + +@note Function textual ID is "org.opencv.core.pixelwise.bitwise_not" + +@param src input matrix. +*/ +GAPI_EXPORTS GMat bitwise_not(const GMat& src); + +/** @brief Select values from either first or second of input matrices by given mask. +The function set to the output matrix either the value from the first input matrix if corresponding value of mask matrix is 255, + or value from the second input matrix (if value of mask matrix set to 0). + +Input mask matrix must be of @ref CV_8UC1 type, two other inout matrices and output matrix should be of the same type. The size should +be the same for all input and output matrices. +Supported input matrix data types are @ref CV_8UC1, @ref CV_8UC3, @ref CV_16UC1, @ref CV_16SC1, @ref CV_32FC1. + +@note Function textual ID is "org.opencv.core.pixelwise.select" + +@param src1 first input matrix. +@param src2 second input matrix. +@param mask mask input matrix. +*/ +GAPI_EXPORTS GMat select(const GMat& src1, const GMat& src2, const GMat& mask); + +//! @} gapi_pixelwise + + +//! @addtogroup gapi_matrixop +//! @{ +/** @brief Calculates per-element minimum of two matrices. + +The function min calculates the per-element minimum of two matrices of the same size, number of channels and depth: +\f[\texttt{dst} (I)= \min ( \texttt{src1} (I), \texttt{src2} (I))\f] + where I is a multi-dimensional index of matrix elements. In case of + multi-channel matrices, each channel is processed independently. +Output matrix must be of the same size and depth as src1. + +Supported input matrix data types are @ref CV_8UC1, @ref CV_8UC3, @ref CV_16UC1, @ref CV_16SC1, @ref CV_32FC1. + +@note Function textual ID is "org.opencv.core.matrixop.min" +@param src1 first input matrix. +@param src2 second input matrix of the same size and depth as src1. +@sa max, compareEqual, compareLess, compareLessEqual +*/ +GAPI_EXPORTS GMat min(const GMat& src1, const GMat& src2); + +/** @brief Calculates per-element maximum of two matrices. + +The function max calculates the per-element maximum of two matrices of the same size, number of channels and depth: +\f[\texttt{dst} (I)= \max ( \texttt{src1} (I), \texttt{src2} (I))\f] + where I is a multi-dimensional index of matrix elements. In case of + multi-channel matrices, each channel is processed independently. +Output matrix must be of the same size and depth as src1. + +Supported matrix data types are @ref CV_8UC1, @ref CV_8UC3, @ref CV_16UC1, @ref CV_16SC1, @ref CV_32FC1. + +@note Function textual ID is "org.opencv.core.matrixop.max" +@param src1 first input matrix. +@param src2 second input matrix of the same size and depth as src1. +@sa min, compare, compareEqual, compareGreater, compareGreaterEqual +*/ +GAPI_EXPORTS GMat max(const GMat& src1, const GMat& src2); + +/** @brief Calculates the per-element absolute difference between two matrices. + +The function absDiff calculates absolute difference between two matrices of the same size and depth: + \f[\texttt{dst}(I) = \texttt{saturate} (| \texttt{src1}(I) - \texttt{src2}(I)|)\f] + where I is a multi-dimensional index of matrix elements. In case of + multi-channel matrices, each channel is processed independently. +Output matrix must have the same size and depth as input matrices. + +Supported matrix data types are @ref CV_8UC1, @ref CV_8UC3, @ref CV_16UC1, @ref CV_16SC1, @ref CV_32FC1. + +@note Function textual ID is "org.opencv.core.matrixop.absdiff" +@param src1 first input matrix. +@param src2 second input matrix. +@sa abs +*/ +GAPI_EXPORTS GMat absDiff(const GMat& src1, const GMat& src2); + +/** @brief Calculates absolute value of matrix elements. + +The function abs calculates absolute difference between matrix elements and given scalar value: + \f[\texttt{dst}(I) = \texttt{saturate} (| \texttt{src1}(I) - \texttt{matC}(I)|)\f] + where matC is constructed from given scalar c and has the same sizes and depth as input matrix src. + +Output matrix must be of the same size and depth as src. + +Supported matrix data types are @ref CV_8UC1, @ref CV_8UC3, @ref CV_16UC1, @ref CV_16SC1, @ref CV_32FC1. + +@note Function textual ID is "org.opencv.core.matrixop.absdiffC" +@param src input matrix. +@param c scalar to be subtracted. +@sa min, max +*/ +GAPI_EXPORTS GMat absDiffC(const GMat& src, const GScalar& c); + +/** @brief Calculates sum of all matrix elements. + +The function sum calculates sum of all matrix elements, independently for each channel. + +Supported matrix data types are @ref CV_8UC1, @ref CV_8UC3, @ref CV_16UC1, @ref CV_16SC1, @ref CV_32FC1. + +@note Function textual ID is "org.opencv.core.matrixop.sum" +@param src input matrix. +@sa min, max +*/ +GAPI_EXPORTS GScalar sum(const GMat& src); + +/** @brief Calculates the weighted sum of two matrices. + +The function addWeighted calculates the weighted sum of two matrices as follows: +\f[\texttt{dst} (I)= \texttt{saturate} ( \texttt{src1} (I)* \texttt{alpha} + \texttt{src2} (I)* \texttt{beta} + \texttt{gamma} )\f] +where I is a multi-dimensional index of array elements. In case of multi-channel matrices, each +channel is processed independently. + +The function can be replaced with a matrix expression: + \f[\texttt{dst}(I) = \texttt{alpha} * \texttt{src1}(I) - \texttt{beta} * \texttt{src2}(I) + \texttt{gamma} \f] + +Supported matrix data types are @ref CV_8UC1, @ref CV_8UC3, @ref CV_16UC1, @ref CV_16SC1, @ref CV_32FC1. + +@note Function textual ID is "org.opencv.core.matrixop.addweighted" +@param src1 first input matrix. +@param alpha weight of the first matrix elements. +@param src2 second input matrix of the same size and channel number as src1. +@param beta weight of the second matrix elements. +@param gamma scalar added to each sum. +@param ddepth optional depth of the output matrix. +@sa add, sub +*/ +GAPI_EXPORTS GMat addWeighted(const GMat& src1, double alpha, const GMat& src2, double beta, double gamma, int ddepth = -1); + +/** @brief Calculates the absolute L1 norm of a matrix. + +This version of normL1 calculates the absolute L1 norm of src. + +As example for one array consider the function \f$r(x)= \begin{pmatrix} x \\ 1-x \end{pmatrix}, x \in [-1;1]\f$. +The \f$ L_{1} \f$ norm for the sample value \f$r(-1) = \begin{pmatrix} -1 \\ 2 \end{pmatrix}\f$ +is calculated as follows +\f{align*} + \| r(-1) \|_{L_1} &= |-1| + |2| = 3 \\ +\f} +and for \f$r(0.5) = \begin{pmatrix} 0.5 \\ 0.5 \end{pmatrix}\f$ the calculation is +\f{align*} + \| r(0.5) \|_{L_1} &= |0.5| + |0.5| = 1 \\ +\f} + +Supported matrix data types are @ref CV_8UC1, @ref CV_8UC3, @ref CV_16UC1, @ref CV_16SC1, @ref CV_32FC1. + +@note Function textual ID is "org.opencv.core.matrixop.norml1" +@param src input matrix. +@sa normL2, normInf +*/ +GAPI_EXPORTS GScalar normL1(const GMat& src); + +/** @brief Calculates the absolute L2 norm of a matrix. + +This version of normL2 calculates the absolute L2 norm of src. + +As example for one array consider the function \f$r(x)= \begin{pmatrix} x \\ 1-x \end{pmatrix}, x \in [-1;1]\f$. +The \f$ L_{2} \f$ norm for the sample value \f$r(-1) = \begin{pmatrix} -1 \\ 2 \end{pmatrix}\f$ +is calculated as follows +\f{align*} + \| r(-1) \|_{L_2} &= \sqrt{(-1)^{2} + (2)^{2}} = \sqrt{5} \\ +\f} +and for \f$r(0.5) = \begin{pmatrix} 0.5 \\ 0.5 \end{pmatrix}\f$ the calculation is +\f{align*} + \| r(0.5) \|_{L_2} &= \sqrt{(0.5)^{2} + (0.5)^{2}} = \sqrt{0.5} \\ +\f} + +Supported matrix data types are @ref CV_8UC1, @ref CV_8UC3, @ref CV_16UC1, @ref CV_16SC1, @ref CV_32FC1. +@note Function textual ID is "org.opencv.core.matrixop.norml2" +@param src input matrix. +@sa normL1, normInf +*/ +GAPI_EXPORTS GScalar normL2(const GMat& src); + +/** @brief Calculates the absolute infinite norm of a matrix. + +This version of normInf calculates the absolute infinite norm of src. + +As example for one array consider the function \f$r(x)= \begin{pmatrix} x \\ 1-x \end{pmatrix}, x \in [-1;1]\f$. +The \f$ L_{\infty} \f$ norm for the sample value \f$r(-1) = \begin{pmatrix} -1 \\ 2 \end{pmatrix}\f$ +is calculated as follows +\f{align*} + \| r(-1) \|_{L_\infty} &= \max(|-1|,|2|) = 2 +\f} +and for \f$r(0.5) = \begin{pmatrix} 0.5 \\ 0.5 \end{pmatrix}\f$ the calculation is +\f{align*} + \| r(0.5) \|_{L_\infty} &= \max(|0.5|,|0.5|) = 0.5. +\f} + +Supported matrix data types are @ref CV_8UC1, @ref CV_8UC3, @ref CV_16UC1, @ref CV_16SC1, @ref CV_32FC1. + +@note Function textual ID is "org.opencv.core.matrixop.norminf" +@param src input matrix. +@sa normL1, normL2 +*/ +GAPI_EXPORTS GScalar normInf(const GMat& src); + +/** @brief Calculates the integral of an image. + +The function calculates one or more integral images for the source image as follows: + +\f[\texttt{sum} (X,Y) = \sum _{x integral(const GMat& src, int sdepth = -1, int sqdepth = -1); + +/** @brief Applies a fixed-level threshold to each matrix element. + +The function applies fixed-level thresholding to a single- or multiple-channel matrix. +The function is typically used to get a bi-level (binary) image out of a grayscale image ( cmp functions could be also used for +this purpose) or for removing a noise, that is, filtering out pixels with too small or too large +values. There are several types of thresholding supported by the function. They are determined by +type parameter. + +Also, the special values cv::THRESH_OTSU or cv::THRESH_TRIANGLE may be combined with one of the +above values. In these cases, the function determines the optimal threshold value using the Otsu's +or Triangle algorithm and uses it instead of the specified thresh . The function returns the +computed threshold value in addititon to thresholded matrix. +The Otsu's and Triangle methods are implemented only for 8-bit matrices. + +Input image should be single channel only in case of cv::THRESH_OTSU or cv::THRESH_TRIANGLE flags. +Output matrix must be of the same size and depth as src. + +@note Function textual ID is "org.opencv.core.matrixop.threshold" + +@param src input matrix (@ref CV_8UC1, @ref CV_8UC3, or @ref CV_32FC1). +@param thresh threshold value. +@param maxval maximum value to use with the cv::THRESH_BINARY and cv::THRESH_BINARY_INV thresholding +types. +@param type thresholding type (see the cv::ThresholdTypes). + +@sa min, max, cmpGT, cmpLE, cmpGE, cmpLS + */ +GAPI_EXPORTS GMat threshold(const GMat& src, const GScalar& thresh, const GScalar& maxval, int type); +/** @overload +This function applicable for all threshold types except CV_THRESH_OTSU and CV_THRESH_TRIANGLE +@note Function textual ID is "org.opencv.core.matrixop.thresholdOT" +*/ +GAPI_EXPORTS std::tuple threshold(const GMat& src, const GScalar& maxval, int type); + +/** @brief Applies a range-level threshold to each matrix element. + +The function applies range-level thresholding to a single- or multiple-channel matrix. +It sets output pixel value to OxFF if the corresponding pixel value of input matrix is in specified range,or 0 otherwise. + +Input and output matrices must be CV_8UC1. + +@note Function textual ID is "org.opencv.core.matrixop.inRange" + +@param src input matrix (CV_8UC1). +@param threshLow lower boundary value. +@param threshUp upper boundary value. + +@sa threshold + */ +GAPI_EXPORTS GMat inRange(const GMat& src, const GScalar& threshLow, const GScalar& threshUp); + +//! @} gapi_matrixop + +//! @addtogroup gapi_transform +//! @{ +/** @brief Resizes an image. + +The function resizes the image src down to or up to the specified size. + +Output image size will have the size dsize (when dsize is non-zero) or the size computed from +src.size(), fx, and fy; the depth of output is the same as of src. + +If you want to resize src so that it fits the pre-created dst, +you may call the function as follows: +@code + // explicitly specify dsize=dst.size(); fx and fy will be computed from that. + resize(src, dst, dst.size(), 0, 0, interpolation); +@endcode +If you want to decimate the image by factor of 2 in each direction, you can call the function this +way: +@code + // specify fx and fy and let the function compute the destination image size. + resize(src, dst, Size(), 0.5, 0.5, interpolation); +@endcode +To shrink an image, it will generally look best with cv::INTER_AREA interpolation, whereas to +enlarge an image, it will generally look best with cv::INTER_CUBIC (slow) or cv::INTER_LINEAR +(faster but still looks OK). + +@note Function textual ID is "org.opencv.core.transform.resize" + +@param src input image. +@param dsize output image size; if it equals zero, it is computed as: + \f[\texttt{dsize = Size(round(fx*src.cols), round(fy*src.rows))}\f] + Either dsize or both fx and fy must be non-zero. +@param fx scale factor along the horizontal axis; when it equals 0, it is computed as +\f[\texttt{(double)dsize.width/src.cols}\f] +@param fy scale factor along the vertical axis; when it equals 0, it is computed as +\f[\texttt{(double)dsize.height/src.rows}\f] +@param interpolation interpolation method, see cv::InterpolationFlags + +@sa warpAffine, warpPerspective, remap, resizeP + */ +GAPI_EXPORTS GMat resize(const GMat& src, const Size& dsize, double fx = 0, double fy = 0, int interpolation = INTER_LINEAR); + +/** @brief Resizes a planar image. + +The function resizes the image src down to or up to the specified size. +Planar image memory layout is three planes laying in the memory contiguously, +so the image height should be plane_height*plane_number, image type is @ref CV_8UC1. + +Output image size will have the size dsize, the depth of output is the same as of src. + +@note Function textual ID is "org.opencv.core.transform.resizeP" + +@param src input image, must be of @ref CV_8UC1 type; +@param dsize output image size; +@param interpolation interpolation method, only cv::INTER_LINEAR is supported at the moment + +@sa warpAffine, warpPerspective, remap, resize + */ +GAPI_EXPORTS GMatP resizeP(const GMatP& src, const Size& dsize, int interpolation = cv::INTER_LINEAR); + +/** @brief Creates one 3-channel (4-channel) matrix out of 3(4) single-channel ones. + +The function merges several matrices to make a single multi-channel matrix. That is, each +element of the output matrix will be a concatenation of the elements of the input matrices, where +elements of i-th input matrix are treated as mv[i].channels()-element vectors. +Input matrix must be of @ref CV_8UC3 (@ref CV_8UC4) type. + +The function split3/split4 does the reverse operation. + +@note Function textual ID for merge3 is "org.opencv.core.transform.merge3" +@note Function textual ID for merge4 is "org.opencv.core.transform.merge4" + +@param src1 first input matrix to be merged +@param src2 second input matrix to be merged +@param src3 third input matrix to be merged +@param src4 fourth input matrix to be merged +@sa split4, split3 +*/ +GAPI_EXPORTS GMat merge4(const GMat& src1, const GMat& src2, const GMat& src3, const GMat& src4); +GAPI_EXPORTS GMat merge3(const GMat& src1, const GMat& src2, const GMat& src3); + +/** @brief Divides a 3-channel (4-channel) matrix into 3(4) single-channel matrices. + +The function splits a 3-channel (4-channel) matrix into 3(4) single-channel matrices: +\f[\texttt{mv} [c](I) = \texttt{src} (I)_c\f] + +All output matrices must be in @ref CV_8UC1. + +@note Function textual for split3 ID is "org.opencv.core.transform.split3" +@note Function textual for split4 ID is "org.opencv.core.transform.split4" + +@param src input @ref CV_8UC4 (@ref CV_8UC3) matrix. +@sa merge3, merge4 +*/ +GAPI_EXPORTS std::tuple split4(const GMat& src); +GAPI_EXPORTS std::tuple split3(const GMat& src); + +/** @brief Applies a generic geometrical transformation to an image. + +The function remap transforms the source image using the specified map: + +\f[\texttt{dst} (x,y) = \texttt{src} (map_x(x,y),map_y(x,y))\f] + +where values of pixels with non-integer coordinates are computed using one of available +interpolation methods. \f$map_x\f$ and \f$map_y\f$ can be encoded as separate floating-point maps +in \f$map_1\f$ and \f$map_2\f$ respectively, or interleaved floating-point maps of \f$(x,y)\f$ in +\f$map_1\f$, or fixed-point maps created by using convertMaps. The reason you might want to +convert from floating to fixed-point representations of a map is that they can yield much faster +(\~2x) remapping operations. In the converted case, \f$map_1\f$ contains pairs (cvFloor(x), +cvFloor(y)) and \f$map_2\f$ contains indices in a table of interpolation coefficients. +Output image must be of the same size and depth as input one. + +@note Function textual ID is "org.opencv.core.transform.remap" + +@param src Source image. +@param map1 The first map of either (x,y) points or just x values having the type CV_16SC2, +CV_32FC1, or CV_32FC2. +@param map2 The second map of y values having the type CV_16UC1, CV_32FC1, or none (empty map +if map1 is (x,y) points), respectively. +@param interpolation Interpolation method (see cv::InterpolationFlags). The method INTER_AREA is +not supported by this function. +@param borderMode Pixel extrapolation method (see cv::BorderTypes). When +borderMode=BORDER_TRANSPARENT, it means that the pixels in the destination image that +corresponds to the "outliers" in the source image are not modified by the function. +@param borderValue Value used in case of a constant border. By default, it is 0. +@note +Due to current implementation limitations the size of an input and output images should be less than 32767x32767. + */ +GAPI_EXPORTS GMat remap(const GMat& src, const Mat& map1, const Mat& map2, + int interpolation, int borderMode = BORDER_CONSTANT, + const Scalar& borderValue = Scalar()); + +/** @brief Flips a 2D matrix around vertical, horizontal, or both axes. + +The function flips the matrix in one of three different ways (row +and column indices are 0-based): +\f[\texttt{dst} _{ij} = +\left\{ +\begin{array}{l l} +\texttt{src} _{\texttt{src.rows}-i-1,j} & if\; \texttt{flipCode} = 0 \\ +\texttt{src} _{i, \texttt{src.cols} -j-1} & if\; \texttt{flipCode} > 0 \\ +\texttt{src} _{ \texttt{src.rows} -i-1, \texttt{src.cols} -j-1} & if\; \texttt{flipCode} < 0 \\ +\end{array} +\right.\f] +The example scenarios of using the function are the following: +* Vertical flipping of the image (flipCode == 0) to switch between + top-left and bottom-left image origin. This is a typical operation + in video processing on Microsoft Windows\* OS. +* Horizontal flipping of the image with the subsequent horizontal + shift and absolute difference calculation to check for a + vertical-axis symmetry (flipCode \> 0). +* Simultaneous horizontal and vertical flipping of the image with + the subsequent shift and absolute difference calculation to check + for a central symmetry (flipCode \< 0). +* Reversing the order of point arrays (flipCode \> 0 or + flipCode == 0). +Output image must be of the same depth as input one, size should be correct for given flipCode. + +@note Function textual ID is "org.opencv.core.transform.flip" + +@param src input matrix. +@param flipCode a flag to specify how to flip the array; 0 means +flipping around the x-axis and positive value (for example, 1) means +flipping around y-axis. Negative value (for example, -1) means flipping +around both axes. +@sa remap +*/ +GAPI_EXPORTS GMat flip(const GMat& src, int flipCode); + +/** @brief Crops a 2D matrix. + +The function crops the matrix by given cv::Rect. + +Output matrix must be of the same depth as input one, size is specified by given rect size. + +@note Function textual ID is "org.opencv.core.transform.crop" + +@param src input matrix. +@param rect a rect to crop a matrix to +@sa resize +*/ +GAPI_EXPORTS GMat crop(const GMat& src, const Rect& rect); + +/** @brief Copies a matrix. + +Copies an input array. Works as a regular Mat::clone but happens in-graph. +Mainly is used to workaround some existing limitations (e.g. to forward an input frame to outputs +in the streaming mode). Will be deprecated and removed in the future. + +@note Function textual ID is "org.opencv.core.transform.copy" + +@param src input matrix. +@sa crop +*/ +GAPI_EXPORTS GMat copy(const GMat& src); + +/** @brief Applies horizontal concatenation to given matrices. + +The function horizontally concatenates two GMat matrices (with the same number of rows). +@code{.cpp} + GMat A = { 1, 4, + 2, 5, + 3, 6 }; + GMat B = { 7, 10, + 8, 11, + 9, 12 }; + + GMat C = gapi::concatHor(A, B); + //C: + //[1, 4, 7, 10; + // 2, 5, 8, 11; + // 3, 6, 9, 12] +@endcode +Output matrix must the same number of rows and depth as the src1 and src2, and the sum of cols of the src1 and src2. +Supported matrix data types are @ref CV_8UC1, @ref CV_8UC3, @ref CV_16UC1, @ref CV_16SC1, @ref CV_32FC1. + +@note Function textual ID is "org.opencv.imgproc.transform.concatHor" + +@param src1 first input matrix to be considered for horizontal concatenation. +@param src2 second input matrix to be considered for horizontal concatenation. +@sa concatVert +*/ +GAPI_EXPORTS GMat concatHor(const GMat& src1, const GMat& src2); + +/** @overload +The function horizontally concatenates given number of GMat matrices (with the same number of columns). +Output matrix must the same number of columns and depth as the input matrices, and the sum of rows of input matrices. + +@param v vector of input matrices to be concatenated horizontally. +*/ +GAPI_EXPORTS GMat concatHor(const std::vector &v); + +/** @brief Applies vertical concatenation to given matrices. + +The function vertically concatenates two GMat matrices (with the same number of cols). + @code{.cpp} + GMat A = { 1, 7, + 2, 8, + 3, 9 }; + GMat B = { 4, 10, + 5, 11, + 6, 12 }; + + GMat C = gapi::concatVert(A, B); + //C: + //[1, 7; + // 2, 8; + // 3, 9; + // 4, 10; + // 5, 11; + // 6, 12] + @endcode + +Output matrix must the same number of cols and depth as the src1 and src2, and the sum of rows of the src1 and src2. +Supported matrix data types are @ref CV_8UC1, @ref CV_8UC3, @ref CV_16UC1, @ref CV_16SC1, @ref CV_32FC1. + +@note Function textual ID is "org.opencv.imgproc.transform.concatVert" + +@param src1 first input matrix to be considered for vertical concatenation. +@param src2 second input matrix to be considered for vertical concatenation. +@sa concatHor +*/ +GAPI_EXPORTS GMat concatVert(const GMat& src1, const GMat& src2); + +/** @overload +The function vertically concatenates given number of GMat matrices (with the same number of columns). +Output matrix must the same number of columns and depth as the input matrices, and the sum of rows of input matrices. + +@param v vector of input matrices to be concatenated vertically. +*/ +GAPI_EXPORTS GMat concatVert(const std::vector &v); + + +/** @brief Performs a look-up table transform of a matrix. + +The function LUT fills the output matrix with values from the look-up table. Indices of the entries +are taken from the input matrix. That is, the function processes each element of src as follows: +\f[\texttt{dst} (I) \leftarrow \texttt{lut(src(I))}\f] + +Supported matrix data types are @ref CV_8UC1. +Output is a matrix of the same size and number of channels as src, and the same depth as lut. + +@note Function textual ID is "org.opencv.core.transform.LUT" + +@param src input matrix of 8-bit elements. +@param lut look-up table of 256 elements; in case of multi-channel input array, the table should +either have a single channel (in this case the same table is used for all channels) or the same +number of channels as in the input matrix. +*/ +GAPI_EXPORTS GMat LUT(const GMat& src, const Mat& lut); + +/** @brief Converts a matrix to another data depth with optional scaling. + +The method converts source pixel values to the target data depth. saturate_cast\<\> is applied at +the end to avoid possible overflows: + +\f[m(x,y) = saturate \_ cast( \alpha (*this)(x,y) + \beta )\f] +Output matrix must be of the same size as input one. + +@note Function textual ID is "org.opencv.core.transform.convertTo" +@param src input matrix to be converted from. +@param rdepth desired output matrix depth or, rather, the depth since the number of channels are the +same as the input has; if rdepth is negative, the output matrix will have the same depth as the input. +@param alpha optional scale factor. +@param beta optional delta added to the scaled values. + */ +GAPI_EXPORTS GMat convertTo(const GMat& src, int rdepth, double alpha=1, double beta=0); + +/** @brief Normalizes the norm or value range of an array. + +The function normalizes scale and shift the input array elements so that +\f[\| \texttt{dst} \| _{L_p}= \texttt{alpha}\f] +(where p=Inf, 1 or 2) when normType=NORM_INF, NORM_L1, or NORM_L2, respectively; or so that +\f[\min _I \texttt{dst} (I)= \texttt{alpha} , \, \, \max _I \texttt{dst} (I)= \texttt{beta}\f] +when normType=NORM_MINMAX (for dense arrays only). + +@note Function textual ID is "org.opencv.core.normalize" + +@param src input array. +@param alpha norm value to normalize to or the lower range boundary in case of the range +normalization. +@param beta upper range boundary in case of the range normalization; it is not used for the norm +normalization. +@param norm_type normalization type (see cv::NormTypes). +@param ddepth when negative, the output array has the same type as src; otherwise, it has the same +number of channels as src and the depth =ddepth. +@sa norm, Mat::convertTo +*/ +GAPI_EXPORTS GMat normalize(const GMat& src, double alpha, double beta, + int norm_type, int ddepth = -1); + +/** @brief Applies a perspective transformation to an image. + +The function warpPerspective transforms the source image using the specified matrix: + +\f[\texttt{dst} (x,y) = \texttt{src} \left ( \frac{M_{11} x + M_{12} y + M_{13}}{M_{31} x + M_{32} y + M_{33}} , + \frac{M_{21} x + M_{22} y + M_{23}}{M_{31} x + M_{32} y + M_{33}} \right )\f] + +when the flag #WARP_INVERSE_MAP is set. Otherwise, the transformation is first inverted with invert +and then put in the formula above instead of M. The function cannot operate in-place. + +@param src input image. +@param M \f$3\times 3\f$ transformation matrix. +@param dsize size of the output image. +@param flags combination of interpolation methods (#INTER_LINEAR or #INTER_NEAREST) and the +optional flag #WARP_INVERSE_MAP, that sets M as the inverse transformation ( +\f$\texttt{dst}\rightarrow\texttt{src}\f$ ). +@param borderMode pixel extrapolation method (#BORDER_CONSTANT or #BORDER_REPLICATE). +@param borderValue value used in case of a constant border; by default, it equals 0. + +@sa warpAffine, resize, remap, getRectSubPix, perspectiveTransform + */ +GAPI_EXPORTS GMat warpPerspective(const GMat& src, const Mat& M, const Size& dsize, int flags = cv::INTER_LINEAR, + int borderMode = cv::BORDER_CONSTANT, const Scalar& borderValue = Scalar()); + +/** @brief Applies an affine transformation to an image. + +The function warpAffine transforms the source image using the specified matrix: + +\f[\texttt{dst} (x,y) = \texttt{src} ( \texttt{M} _{11} x + \texttt{M} _{12} y + \texttt{M} _{13}, \texttt{M} _{21} x + \texttt{M} _{22} y + \texttt{M} _{23})\f] + +when the flag #WARP_INVERSE_MAP is set. Otherwise, the transformation is first inverted +with #invertAffineTransform and then put in the formula above instead of M. The function cannot +operate in-place. + +@param src input image. +@param M \f$2\times 3\f$ transformation matrix. +@param dsize size of the output image. +@param flags combination of interpolation methods (see #InterpolationFlags) and the optional +flag #WARP_INVERSE_MAP that means that M is the inverse transformation ( +\f$\texttt{dst}\rightarrow\texttt{src}\f$ ). +@param borderMode pixel extrapolation method (see #BorderTypes); +borderMode=#BORDER_TRANSPARENT isn't supported +@param borderValue value used in case of a constant border; by default, it is 0. + +@sa warpPerspective, resize, remap, getRectSubPix, transform + */ +GAPI_EXPORTS GMat warpAffine(const GMat& src, const Mat& M, const Size& dsize, int flags = cv::INTER_LINEAR, + int borderMode = cv::BORDER_CONSTANT, const Scalar& borderValue = Scalar()); +//! @} gapi_transform + +} //namespace gapi +} //namespace cv + +#endif //OPENCV_GAPI_CORE_HPP diff --git a/IPL/include/opencv/opencv2/gapi/cpu/core.hpp b/IPL/include/opencv/opencv2/gapi/cpu/core.hpp new file mode 100644 index 0000000..ffd3596 --- /dev/null +++ b/IPL/include/opencv/opencv2/gapi/cpu/core.hpp @@ -0,0 +1,27 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// +// Copyright (C) 2018 Intel Corporation + + +#ifndef OPENCV_GAPI_CPU_CORE_API_HPP +#define OPENCV_GAPI_CPU_CORE_API_HPP + +#include // GKernelPackage +#include // GAPI_EXPORTS + +namespace cv { +namespace gapi { +namespace core { +namespace cpu { + +GAPI_EXPORTS GKernelPackage kernels(); + +} // namespace cpu +} // namespace core +} // namespace gapi +} // namespace cv + + +#endif // OPENCV_GAPI_CPU_CORE_API_HPP diff --git a/IPL/include/opencv/opencv2/gapi/cpu/gcpukernel.hpp b/IPL/include/opencv/opencv2/gapi/cpu/gcpukernel.hpp new file mode 100644 index 0000000..764e085 --- /dev/null +++ b/IPL/include/opencv/opencv2/gapi/cpu/gcpukernel.hpp @@ -0,0 +1,385 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// +// Copyright (C) 2018-2019 Intel Corporation + + +#ifndef OPENCV_GAPI_GCPUKERNEL_HPP +#define OPENCV_GAPI_GCPUKERNEL_HPP + +#include +#include +#include +#include + +#include +#include +#include +#include +#include //to_ocv +#include //suppress_unused_warning +#include + +// FIXME: namespace scheme for backends? +namespace cv { + +namespace gimpl +{ + // Forward-declare an internal class + class GCPUExecutable; + + namespace render + { + namespace ocv + { + class GRenderExecutable; + } + } +} // namespace gimpl + +namespace gapi +{ +namespace cpu +{ + /** + * \addtogroup gapi_std_backends + * @{ + * + * @brief G-API backends available in this OpenCV version + * + * G-API backends play a corner stone role in G-API execution + * stack. Every backend is hardware-oriented and thus can run its + * kernels efficiently on the target platform. + * + * Backends are usually "black boxes" for G-API users -- on the API + * side, all backends are represented as different objects of the + * same class cv::gapi::GBackend. + * User can manipulate with backends by specifying which kernels to use. + * + * @sa @ref gapi_hld + */ + + /** + * @brief Get a reference to CPU (OpenCV) backend. + * + * This is the default backend in G-API at the moment, providing + * broader functional coverage but losing some graph model + * advantages. Provided mostly for reference and prototyping + * purposes. + * + * @sa gapi_std_backends + */ + GAPI_EXPORTS cv::gapi::GBackend backend(); + /** @} */ + + class GOCVFunctor; + + //! @cond IGNORED + template + GOCVFunctor ocv_kernel(const Callable& c); + + template + GOCVFunctor ocv_kernel(Callable& c); + //! @endcond + +} // namespace cpu +} // namespace gapi + +// Represents arguments which are passed to a wrapped CPU function +// FIXME: put into detail? +class GAPI_EXPORTS GCPUContext +{ +public: + // Generic accessor API + template + const T& inArg(int input) { return m_args.at(input).get(); } + + // Syntax sugar + const cv::gapi::own::Mat& inMat(int input); + cv::gapi::own::Mat& outMatR(int output); // FIXME: Avoid cv::gapi::own::Mat m = ctx.outMatR() + + const cv::Scalar& inVal(int input); + cv::Scalar& outValR(int output); // FIXME: Avoid cv::Scalar s = ctx.outValR() + template std::vector& outVecR(int output) // FIXME: the same issue + { + return outVecRef(output).wref(); + } + template T& outOpaqueR(int output) // FIXME: the same issue + { + return outOpaqueRef(output).wref(); + } + +protected: + detail::VectorRef& outVecRef(int output); + detail::OpaqueRef& outOpaqueRef(int output); + + std::vector m_args; + + //FIXME: avoid conversion of arguments from internal representation to OpenCV one on each call + //to OCV kernel. (This can be achieved by a two single time conversions in GCPUExecutable::run, + //once on enter for input and output arguments, and once before return for output arguments only + std::unordered_map m_results; + + friend class gimpl::GCPUExecutable; + friend class gimpl::render::ocv::GRenderExecutable; +}; + +class GAPI_EXPORTS GCPUKernel +{ +public: + // This function is kernel's execution entry point (does the processing work) + using F = std::function; + + GCPUKernel(); + explicit GCPUKernel(const F& f); + + void apply(GCPUContext &ctx); + +protected: + F m_f; +}; + +// FIXME: This is an ugly ad-hoc implementation. TODO: refactor + +namespace detail +{ +template struct get_in; +template<> struct get_in +{ + static cv::Mat get(GCPUContext &ctx, int idx) { return to_ocv(ctx.inMat(idx)); } +}; +template<> struct get_in +{ + static cv::Mat get(GCPUContext &ctx, int idx) { return get_in::get(ctx, idx); } +}; +template<> struct get_in +{ + static cv::Mat get(GCPUContext &ctx, int idx) { return get_in::get(ctx, idx); } +}; +template<> struct get_in +{ + static cv::Scalar get(GCPUContext &ctx, int idx) { return ctx.inVal(idx); } +}; +template struct get_in > +{ + static const std::vector& get(GCPUContext &ctx, int idx) { return ctx.inArg(idx).rref(); } +}; +template struct get_in > +{ + static const U& get(GCPUContext &ctx, int idx) { return ctx.inArg(idx).rref(); } +}; + +//FIXME(dm): GArray/GArray conversion should be done more gracefully in the system +template<> struct get_in >: public get_in > +{ +}; + +//FIXME(dm): GArray/GArray conversion should be done more gracefully in the system +template<> struct get_in >: public get_in > +{ +}; + +//FIXME(dm): GOpaque/GOpaque conversion should be done more gracefully in the system +template<> struct get_in >: public get_in > +{ +}; + +//FIXME(dm): GOpaque/GOpaque conversion should be done more gracefully in the system +template<> struct get_in >: public get_in > +{ +}; + +template struct get_in +{ + static T get(GCPUContext &ctx, int idx) { return ctx.inArg(idx); } +}; + +struct tracked_cv_mat{ + tracked_cv_mat(cv::gapi::own::Mat& m) : r{to_ocv(m)}, original_data{m.data} {} + cv::Mat r; + uchar* original_data; + + operator cv::Mat& (){ return r;} + void validate() const{ + if (r.data != original_data) + { + util::throw_error + (std::logic_error + ("OpenCV kernel output parameter was reallocated. \n" + "Incorrect meta data was provided ?")); + } + } +}; + +template +void postprocess(Outputs&... outs) +{ + struct + { + void operator()(tracked_cv_mat* bm) { bm->validate(); } + void operator()(...) { } + + } validate; + //dummy array to unfold parameter pack + int dummy[] = { 0, (validate(&outs), 0)... }; + cv::util::suppress_unused_warning(dummy); +} + +template struct get_out; +template<> struct get_out +{ + static tracked_cv_mat get(GCPUContext &ctx, int idx) + { + auto& r = ctx.outMatR(idx); + return {r}; + } +}; +template<> struct get_out +{ + static tracked_cv_mat get(GCPUContext &ctx, int idx) + { + return get_out::get(ctx, idx); + } +}; +template<> struct get_out +{ + static cv::Scalar& get(GCPUContext &ctx, int idx) + { + return ctx.outValR(idx); + } +}; +template struct get_out> +{ + static std::vector& get(GCPUContext &ctx, int idx) + { + return ctx.outVecR(idx); + } +}; +template struct get_out> +{ + static U& get(GCPUContext &ctx, int idx) + { + return ctx.outOpaqueR(idx); + } +}; + +template +struct OCVCallHelper; + +// FIXME: probably can be simplified with std::apply or analogue. +template +struct OCVCallHelper, std::tuple > +{ + template + struct call_and_postprocess + { + template + static void call(Inputs&&... ins, Outputs&&... outs) + { + //not using a std::forward on outs is deliberate in order to + //cause compilation error, by trying to bind rvalue references to lvalue references + Impl::run(std::forward(ins)..., outs...); + postprocess(outs...); + } + + template + static void call(Impl& impl, Inputs&&... ins, Outputs&&... outs) + { + impl(std::forward(ins)..., outs...); + } + }; + + template + static void call_impl(GCPUContext &ctx, detail::Seq, detail::Seq) + { + //Make sure that OpenCV kernels do not reallocate memory for output parameters + //by comparing it's state (data ptr) before and after the call. + //This is done by converting each output Mat into tracked_cv_mat object, and binding + //them to parameters of ad-hoc function + //Convert own::Scalar to cv::Scalar before call kernel and run kernel + //convert cv::Scalar to own::Scalar after call kernel and write back results + call_and_postprocess::get(ctx, IIs))...> + ::call(get_in::get(ctx, IIs)..., + get_out::get(ctx, OIs)...); + } + + template + static void call_impl(cv::GCPUContext &ctx, Impl& impl, detail::Seq, detail::Seq) + { + call_and_postprocess::get(ctx, IIs))...> + ::call(impl, cv::detail::get_in::get(ctx, IIs)..., + cv::detail::get_out::get(ctx, OIs)...); + } + + static void call(GCPUContext &ctx) + { + call_impl(ctx, + typename detail::MkSeq::type(), + typename detail::MkSeq::type()); + } + + // NB: Same as call but calling the object + // This necessary for kernel implementations that have a state + // and are represented as an object + static void callFunctor(cv::GCPUContext &ctx, Impl& impl) + { + call_impl(ctx, impl, + typename detail::MkSeq::type(), + typename detail::MkSeq::type()); + } +}; + +} // namespace detail + +template +class GCPUKernelImpl: public cv::detail::OCVCallHelper, + public cv::detail::KernelTag +{ + using P = detail::OCVCallHelper; + +public: + using API = K; + + static cv::gapi::GBackend backend() { return cv::gapi::cpu::backend(); } + static cv::GCPUKernel kernel() { return GCPUKernel(&P::call); } +}; + +#define GAPI_OCV_KERNEL(Name, API) struct Name: public cv::GCPUKernelImpl + +class gapi::cpu::GOCVFunctor : public gapi::GFunctor +{ +public: + using Impl = std::function; + + GOCVFunctor(const char* id, const Impl& impl) + : gapi::GFunctor(id), impl_{GCPUKernel(impl)} + { + } + + GKernelImpl impl() const override { return impl_; } + gapi::GBackend backend() const override { return gapi::cpu::backend(); } + +private: + GKernelImpl impl_; +}; + +//! @cond IGNORED +template +gapi::cpu::GOCVFunctor gapi::cpu::ocv_kernel(Callable& c) +{ + using P = detail::OCVCallHelper; + return GOCVFunctor(K::id(), std::bind(&P::callFunctor, std::placeholders::_1, std::ref(c))); +} + +template +gapi::cpu::GOCVFunctor gapi::cpu::ocv_kernel(const Callable& c) +{ + using P = detail::OCVCallHelper; + return GOCVFunctor(K::id(), std::bind(&P::callFunctor, std::placeholders::_1, c)); +} +//! @endcond + +} // namespace cv + +#endif // OPENCV_GAPI_GCPUKERNEL_HPP diff --git a/IPL/include/opencv/opencv2/gapi/cpu/imgproc.hpp b/IPL/include/opencv/opencv2/gapi/cpu/imgproc.hpp new file mode 100644 index 0000000..0b96db0 --- /dev/null +++ b/IPL/include/opencv/opencv2/gapi/cpu/imgproc.hpp @@ -0,0 +1,27 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// +// Copyright (C) 2018 Intel Corporation + + +#ifndef OPENCV_GAPI_CPU_IMGPROC_API_HPP +#define OPENCV_GAPI_CPU_IMGPROC_API_HPP + +#include // GAPI_EXPORTS +#include // GKernelPackage + +namespace cv { +namespace gapi { +namespace imgproc { +namespace cpu { + +GAPI_EXPORTS GKernelPackage kernels(); + +} // namespace cpu +} // namespace imgproc +} // namespace gapi +} // namespace cv + + +#endif // OPENCV_GAPI_CPU_IMGPROC_API_HPP diff --git a/IPL/include/opencv/opencv2/gapi/cpu/video.hpp b/IPL/include/opencv/opencv2/gapi/cpu/video.hpp new file mode 100644 index 0000000..d3c1f2e --- /dev/null +++ b/IPL/include/opencv/opencv2/gapi/cpu/video.hpp @@ -0,0 +1,25 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// +// Copyright (C) 2020 Intel Corporation + +#ifndef OPENCV_GAPI_CPU_VIDEO_API_HPP +#define OPENCV_GAPI_CPU_VIDEO_API_HPP + +#include // GKernelPackage + +namespace cv { +namespace gapi { +namespace video { +namespace cpu { + +GAPI_EXPORTS GKernelPackage kernels(); + +} // namespace cpu +} // namespace video +} // namespace gapi +} // namespace cv + + +#endif // OPENCV_GAPI_CPU_VIDEO_API_HPP diff --git a/IPL/include/opencv/opencv2/gapi/fluid/core.hpp b/IPL/include/opencv/opencv2/gapi/fluid/core.hpp new file mode 100644 index 0000000..8c21f57 --- /dev/null +++ b/IPL/include/opencv/opencv2/gapi/fluid/core.hpp @@ -0,0 +1,20 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// +// Copyright (C) 2018 Intel Corporation + + +#ifndef OPENCV_GAPI_FLUID_CORE_HPP +#define OPENCV_GAPI_FLUID_CORE_HPP + +#include // GKernelPackage +#include // GAPI_EXPORTS + +namespace cv { namespace gapi { namespace core { namespace fluid { + +GAPI_EXPORTS GKernelPackage kernels(); + +}}}} + +#endif // OPENCV_GAPI_FLUID_CORE_HPP diff --git a/IPL/include/opencv/opencv2/gapi/fluid/gfluidbuffer.hpp b/IPL/include/opencv/opencv2/gapi/fluid/gfluidbuffer.hpp new file mode 100644 index 0000000..02e74f2 --- /dev/null +++ b/IPL/include/opencv/opencv2/gapi/fluid/gfluidbuffer.hpp @@ -0,0 +1,157 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// +// Copyright (C) 2018 Intel Corporation + + +#ifndef OPENCV_GAPI_FLUID_BUFFER_HPP +#define OPENCV_GAPI_FLUID_BUFFER_HPP + +#include +#include // accumulate +#include // ostream +#include // uint8_t + +#include +#include +#include + +#include +#include + +namespace cv { +namespace gapi { +namespace fluid { + +struct Border +{ + // This constructor is required to support existing kernels which are part of G-API + Border(int _type, cv::Scalar _val) : type(_type), value(_val) {}; + + int type; + cv::Scalar value; +}; + +using BorderOpt = util::optional; + +bool operator == (const Border& b1, const Border& b2); + +class GAPI_EXPORTS Buffer; + +class GAPI_EXPORTS View +{ +public: + struct Cache + { + std::vector m_linePtrs; + GMatDesc m_desc; + int m_border_size = 0; + + inline const uint8_t* linePtr(int index) const + { + // "out_of_window" check: + // user must not request the lines which are outside of specified kernel window + GAPI_DbgAssert(index >= -m_border_size + && index < -m_border_size + static_cast(m_linePtrs.size())); + return m_linePtrs[index + m_border_size]; + } + }; + + View() = default; + + const inline uint8_t* InLineB(int index) const // -(w-1)/2...0...+(w-1)/2 for Filters + { + return m_cache->linePtr(index); + } + + template const inline T* InLine(int i) const + { + const uint8_t* ptr = this->InLineB(i); + return reinterpret_cast(ptr); + } + + inline operator bool() const { return m_priv != nullptr; } + bool ready() const; + inline int length() const { return m_cache->m_desc.size.width; } + int y() const; + + inline const GMatDesc& meta() const { return m_cache->m_desc; } + + class GAPI_EXPORTS Priv; // internal use only + Priv& priv(); // internal use only + const Priv& priv() const; // internal use only + + View(std::unique_ptr&& p); + View(View&& v); + View& operator=(View&& v); + ~View(); + +private: + std::unique_ptr m_priv; + const Cache* m_cache; +}; + +class GAPI_EXPORTS Buffer +{ +public: + struct Cache + { + std::vector m_linePtrs; + GMatDesc m_desc; + }; + + // Default constructor (executable creation stage, + // all following initialization performed in Priv::init()) + Buffer(); + // Scratch constructor (user kernels) + Buffer(const cv::GMatDesc &desc); + + // Constructor for intermediate buffers (for tests) + Buffer(const cv::GMatDesc &desc, + int max_line_consumption, int border_size, + int skew, + int wlpi, + BorderOpt border); + // Constructor for in/out buffers (for tests) + Buffer(const cv::gapi::own::Mat &data, bool is_input); + ~Buffer(); + Buffer& operator=(Buffer&&); + + inline uint8_t* OutLineB(int index = 0) + { + return m_cache->m_linePtrs[index]; + } + + template inline T* OutLine(int index = 0) + { + uint8_t* ptr = this->OutLineB(index); + return reinterpret_cast(ptr); + } + + int y() const; + + int linesReady() const; + void debug(std::ostream &os) const; + inline int length() const { return m_cache->m_desc.size.width; } + int lpi() const; // LPI for WRITER + + inline const GMatDesc& meta() const { return m_cache->m_desc; } + + View mkView(int borderSize, bool ownStorage); + void addView(const View* v); + + class GAPI_EXPORTS Priv; // internal use only + Priv& priv(); // internal use only + const Priv& priv() const; // internal use only + +private: + std::unique_ptr m_priv; + const Cache* m_cache; +}; + +} // namespace cv::gapi::fluid +} // namespace cv::gapi +} // namespace cv + +#endif // OPENCV_GAPI_FLUID_BUFFER_HPP diff --git a/IPL/include/opencv/opencv2/gapi/fluid/gfluidkernel.hpp b/IPL/include/opencv/opencv2/gapi/fluid/gfluidkernel.hpp new file mode 100644 index 0000000..dbb36d8 --- /dev/null +++ b/IPL/include/opencv/opencv2/gapi/fluid/gfluidkernel.hpp @@ -0,0 +1,439 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// +// Copyright (C) 2018-2019 Intel Corporation + + +#ifndef OPENCV_GAPI_FLUID_KERNEL_HPP +#define OPENCV_GAPI_FLUID_KERNEL_HPP + +#include +#include +#include +#include + +#include +#include +#include +#include + +#include + +// FIXME: namespace scheme for backends? +namespace cv { + +namespace gapi +{ +namespace fluid +{ + /** + * \addtogroup gapi_std_backends G-API Standard Backends + * @{ + */ + /** + * @brief Get a reference to Fluid backend. + * + * @sa gapi_std_backends + */ + GAPI_EXPORTS cv::gapi::GBackend backend(); + /** @} */ +} // namespace fluid +} // namespace gapi + + +class GAPI_EXPORTS GFluidKernel +{ +public: + enum class Kind + { + Filter, + Resize, + YUV420toRGB //Color conversion of 4:2:0 chroma sub-sampling formats (NV12, I420 ..etc) to RGB + }; + + // This function is a generic "doWork" callback + using F = std::function &)>; + + // This function is a generic "initScratch" callback + using IS = std::function; + + // This function is a generic "resetScratch" callback + using RS = std::function; + + // This function describes kernel metadata inference rule. + using M = std::function; + + // This function is a generic "getBorder" callback (extracts border-related data from kernel's input parameters) + using B = std::function; + + // This function is a generic "getWindow" callback (extracts window-related data from kernel's input parameters) + using GW = std::function; + + // FIXME: move implementations out of header file + GFluidKernel() {} + GFluidKernel(Kind k, int l, bool scratch, const F& f, const IS &is, const RS &rs, const B& b, const GW& win) + : m_kind(k) + , m_lpi(l) + , m_scratch(scratch) + , m_f(f) + , m_is(is) + , m_rs(rs) + , m_b(b) + , m_gw(win) {} + + Kind m_kind; + const int m_lpi = -1; + const bool m_scratch = false; + + const F m_f; + const IS m_is; + const RS m_rs; + const B m_b; + const GW m_gw; +}; + +// FIXME!!! +// This is the temporary and experimental API +// which should be replaced by runtime roi-based scheduling +/** \addtogroup gapi_compile_args + * @{ + */ +/** + * @brief This structure allows to control the output image region + * which Fluid backend will produce in the graph. + * + * This feature is useful for external tiling and parallelism, but + * will be deprecated in the future releases. + */ +struct GFluidOutputRois +{ + std::vector rois; +}; + +/** + * @brief This structure forces Fluid backend to generate multiple + * parallel output regions in the graph. These regions execute in parallel. + * + * This feature may be deprecated in the future releases. + */ +struct GFluidParallelOutputRois +{ + std::vector parallel_rois; +}; + +/** + * @brief This structure allows to customize the way how Fluid executes + * parallel regions. + * + * For example, user can utilize his own threading runtime via this parameter. + * The `parallel_for` member functor is called by the Fluid runtime with the + * following arguments: + * + * @param size Size of the parallel range to process + * @param f A function which should be called for every integer index + * in this range by the specified parallel_for implementation. + * + * This feature may be deprecated in the future releases. + */ +struct GFluidParallelFor +{ + //this function accepts: + // - size of the "parallel" range as the first argument + // - and a function to be called on the range items, designated by item index + std::function)> parallel_for; +}; +/** @} gapi_compile_args */ + +namespace detail +{ +template<> struct CompileArgTag +{ + static const char* tag() { return "gapi.fluid.outputRois"; } +}; + +template<> struct CompileArgTag +{ + static const char* tag() { return "gapi.fluid.parallelFor"; } +}; + +template<> struct CompileArgTag +{ + static const char* tag() { return "gapi.fluid.parallelOutputRois"; } +}; + +} // namespace detail + +namespace detail +{ +template struct fluid_get_in; +template<> struct fluid_get_in +{ + static const cv::gapi::fluid::View& get(const cv::GArgs &in_args, int idx) + { + return *in_args[idx].unsafe_get(); + } +}; + +template<> struct fluid_get_in +{ + // FIXME: change to return by reference when moved to own::Scalar + static const cv::Scalar get(const cv::GArgs &in_args, int idx) + { + return in_args[idx].unsafe_get(); + } +}; + +template struct fluid_get_in> +{ + static const std::vector& get(const cv::GArgs &in_args, int idx) + { + return in_args.at(idx).unsafe_get().rref(); + } +}; + +template struct fluid_get_in> +{ + static const U& get(const cv::GArgs &in_args, int idx) + { + return in_args.at(idx).unsafe_get().rref(); + } +}; + +template struct fluid_get_in +{ + static const T& get(const cv::GArgs &in_args, int idx) + { + return in_args[idx].unsafe_get(); + } +}; + +template +struct scratch_helper; + +template +struct scratch_helper +{ + // Init + template + static void help_init_impl(const cv::GMetaArgs &metas, + const cv::GArgs &in_args, + gapi::fluid::Buffer &scratch_buf, + detail::Seq) + { + Impl::initScratch(get_in_meta(metas, in_args, IIs)..., scratch_buf); + } + + static void help_init(const cv::GMetaArgs &metas, + const cv::GArgs &in_args, + gapi::fluid::Buffer &b) + { + help_init_impl(metas, in_args, b, typename detail::MkSeq::type()); + } + + // Reset + static void help_reset(gapi::fluid::Buffer &b) + { + Impl::resetScratch(b); + } +}; + +template +struct scratch_helper +{ + static void help_init(const cv::GMetaArgs &, + const cv::GArgs &, + gapi::fluid::Buffer &) + { + GAPI_Assert(false); + } + static void help_reset(gapi::fluid::Buffer &) + { + GAPI_Assert(false); + } +}; + +template struct is_gmat_type +{ + static const constexpr bool value = std::is_same::value; +}; + +template +struct get_border_helper; + +template +struct get_border_helper +{ + template + static gapi::fluid::BorderOpt get_border_impl(const GMetaArgs &metas, + const cv::GArgs &in_args, + cv::detail::Seq) + { + return util::make_optional(Impl::getBorder(cv::detail::get_in_meta(metas, in_args, IIs)...)); + } + + static gapi::fluid::BorderOpt help(const GMetaArgs &metas, + const cv::GArgs &in_args) + { + return get_border_impl(metas, in_args, typename detail::MkSeq::type()); + } +}; + +template +struct get_border_helper +{ + static gapi::fluid::BorderOpt help(const cv::GMetaArgs &, + const cv::GArgs &) + { + return {}; + } +}; + +template +struct get_window_helper; + +template +struct get_window_helper +{ + template + static int get_window_impl(const GMetaArgs &metas, + const cv::GArgs &in_args, + cv::detail::Seq) + { + return Impl::getWindow(cv::detail::get_in_meta(metas, in_args, IIs)...); + } + + static int help(const GMetaArgs &metas, const cv::GArgs &in_args) + { + return get_window_impl(metas, in_args, typename detail::MkSeq::type()); + } +}; + +template +struct get_window_helper +{ + static int help(const cv::GMetaArgs &, + const cv::GArgs &) + { + return Impl::Window; + } +}; + +template +struct has_Window +{ +private: + template + static constexpr auto Check(U*) -> typename std::is_same::type; + + template + static constexpr std::false_type Check(...); + + typedef decltype(Check(0)) Result; + +public: + static constexpr bool value = Result::value; +}; + +template +struct callCustomGetBorder; + +template +struct callCustomGetBorder +{ + static constexpr bool value = (Impl::Window != 1); +}; + +template +struct callCustomGetBorder +{ + static constexpr bool value = true; +}; + +template +struct FluidCallHelper; + +template +struct FluidCallHelper, std::tuple, UseScratch> +{ + static_assert(all_satisfy::value, "return type must be GMat"); + static_assert(contains::value, "input must contain at least one GMat"); + + // Execution dispatcher //////////////////////////////////////////////////// + template + static void call_impl(const cv::GArgs &in_args, + const std::vector &out_bufs, + detail::Seq, + detail::Seq) + { + Impl::run(fluid_get_in::get(in_args, IIs)..., *out_bufs[OIs]...); + } + + static void call(const cv::GArgs &in_args, + const std::vector &out_bufs) + { + constexpr int numOuts = (sizeof...(Outs)) + (UseScratch ? 1 : 0); + call_impl(in_args, out_bufs, + typename detail::MkSeq::type(), + typename detail::MkSeq::type()); + } + + // Scratch buffer initialization dispatcher //////////////////////////////// + static void init_scratch(const GMetaArgs &metas, + const cv::GArgs &in_args, + gapi::fluid::Buffer &b) + { + scratch_helper::help_init(metas, in_args, b); + } + + // Scratch buffer reset dispatcher ///////////////////////////////////////// + static void reset_scratch(gapi::fluid::Buffer &scratch_buf) + { + scratch_helper::help_reset(scratch_buf); + } + + static gapi::fluid::BorderOpt getBorder(const GMetaArgs &metas, const cv::GArgs &in_args) + { + constexpr bool hasWindow = has_Window::value; + + // User must provide "init" callback if Window != 1 + // TODO: move to constexpr if when we enable C++17 + return get_border_helper::value, Impl, Ins...>::help(metas, in_args); + } + + static int getWindow(const GMetaArgs &metas, const cv::GArgs &in_args) + { + constexpr bool callCustomGetWindow = !(has_Window::value); + return get_window_helper::help(metas, in_args); + } +}; +} // namespace detail + + +template +class GFluidKernelImpl : public cv::detail::KernelTag +{ + static const int LPI = 1; + static const auto Kind = GFluidKernel::Kind::Filter; + using P = detail::FluidCallHelper; + +public: + using API = K; + + static GFluidKernel kernel() + { + // FIXME: call() and getOutMeta() needs to be renamed so it is clear these + // functions are internal wrappers, not user API + return GFluidKernel(Impl::Kind, Impl::LPI, + UseScratch, + &P::call, &P::init_scratch, &P::reset_scratch, &P::getBorder, &P::getWindow); + } + + static cv::gapi::GBackend backend() { return cv::gapi::fluid::backend(); } +}; + +#define GAPI_FLUID_KERNEL(Name, API, Scratch) struct Name: public cv::GFluidKernelImpl + +} // namespace cv + +#endif // OPENCV_GAPI_GCPUKERNEL_HPP diff --git a/IPL/include/opencv/opencv2/gapi/fluid/imgproc.hpp b/IPL/include/opencv/opencv2/gapi/fluid/imgproc.hpp new file mode 100644 index 0000000..dedfa9d --- /dev/null +++ b/IPL/include/opencv/opencv2/gapi/fluid/imgproc.hpp @@ -0,0 +1,20 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// +// Copyright (C) 2018 Intel Corporation + + +#ifndef OPENCV_GAPI_FLUID_IMGPROC_HPP +#define OPENCV_GAPI_FLUID_IMGPROC_HPP + +#include // GKernelPackage +#include // GAPI_EXPORTS + +namespace cv { namespace gapi { namespace imgproc { namespace fluid { + +GAPI_EXPORTS GKernelPackage kernels(); + +}}}} + +#endif // OPENCV_GAPI_FLUID_IMGPROC_HPP diff --git a/IPL/include/opencv/opencv2/gapi/garg.hpp b/IPL/include/opencv/opencv2/gapi/garg.hpp new file mode 100644 index 0000000..5aaea55 --- /dev/null +++ b/IPL/include/opencv/opencv2/gapi/garg.hpp @@ -0,0 +1,147 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// +// Copyright (C) 2018 Intel Corporation + + +#ifndef OPENCV_GAPI_GARG_HPP +#define OPENCV_GAPI_GARG_HPP + +#include +#include + +#include +#include + +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +namespace cv { + +class GArg; + +namespace detail { + template + using is_garg = std::is_same::type>; +} + +// Parameter holder class for a node +// Depending on platform capabilities, can either support arbitrary types +// (as `boost::any`) or a limited number of types (as `boot::variant`). +// FIXME: put into "details" as a user shouldn't use it in his code +class GAPI_EXPORTS GArg +{ +public: + GArg() {} + + template::value, int>::type = 0> + explicit GArg(const T &t) + : kind(detail::GTypeTraits::kind) + , value(detail::wrap_gapi_helper::wrap(t)) + { + } + + template::value, int>::type = 0> + explicit GArg(T &&t) + : kind(detail::GTypeTraits::type>::kind) + , value(detail::wrap_gapi_helper::wrap(t)) + { + } + + template inline T& get() + { + return util::any_cast::type>(value); + } + + template inline const T& get() const + { + return util::any_cast::type>(value); + } + + template inline T& unsafe_get() + { + return util::unsafe_any_cast::type>(value); + } + + template inline const T& unsafe_get() const + { + return util::unsafe_any_cast::type>(value); + } + + detail::ArgKind kind = detail::ArgKind::OPAQUE_VAL; + +protected: + util::any value; +}; + +using GArgs = std::vector; + +// FIXME: Express as M::type +// FIXME: Move to a separate file! +using GRunArg = util::variant< +#if !defined(GAPI_STANDALONE) + cv::Mat, + cv::UMat, +#endif // !defined(GAPI_STANDALONE) + cv::gapi::wip::IStreamSource::Ptr, + cv::gapi::own::Mat, + cv::Scalar, + cv::detail::VectorRef, + cv::detail::OpaqueRef + >; +using GRunArgs = std::vector; + +namespace gapi +{ +namespace wip +{ +/** + * @brief This aggregate type represents all types which G-API can handle (via variant). + * + * It only exists to overcome C++ language limitations (where a `using`-defined class can't be forward-declared). + */ +struct Data: public GRunArg +{ + using GRunArg::GRunArg; + template + Data& operator= (const T& t) { GRunArg::operator=(t); return *this; } + template + Data& operator= (T&& t) { GRunArg::operator=(std::move(t)); return *this; } +}; +} // namespace wip +} // namespace gapi + +using GRunArgP = util::variant< +#if !defined(GAPI_STANDALONE) + cv::Mat*, + cv::UMat*, +#endif // !defined(GAPI_STANDALONE) + cv::gapi::own::Mat*, + cv::Scalar*, + cv::detail::VectorRef, + cv::detail::OpaqueRef + >; +using GRunArgsP = std::vector; + +template inline GRunArgs gin(const Ts&... args) +{ + return GRunArgs{ GRunArg(detail::wrap_host_helper::wrap_in(args))... }; +} + +template inline GRunArgsP gout(Ts&... args) +{ + return GRunArgsP{ GRunArgP(detail::wrap_host_helper::wrap_out(args))... }; +} + +} // namespace cv + +#endif // OPENCV_GAPI_GARG_HPP diff --git a/IPL/include/opencv/opencv2/gapi/garray.hpp b/IPL/include/opencv/opencv2/gapi/garray.hpp new file mode 100644 index 0000000..35b1f00 --- /dev/null +++ b/IPL/include/opencv/opencv2/gapi/garray.hpp @@ -0,0 +1,324 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// +// Copyright (C) 2018 Intel Corporation + + +#ifndef OPENCV_GAPI_GARRAY_HPP +#define OPENCV_GAPI_GARRAY_HPP + +#include +#include +#include +#include + +#include +#include + +#include +#include +#include + +#include // flatten_g only! +#include // flatten_g only! + +namespace cv +{ +// Forward declaration; GNode and GOrigin are an internal +// (user-inaccessible) classes. +class GNode; +struct GOrigin; +template class GArray; + +/** + * \addtogroup gapi_meta_args + * @{ + */ +struct GArrayDesc +{ + // FIXME: Body + // FIXME: Also implement proper operator== then + bool operator== (const GArrayDesc&) const { return true; } +}; +template GArrayDesc descr_of(const std::vector &) { return {};} +static inline GArrayDesc empty_array_desc() {return {}; } +/** @} */ + +std::ostream& operator<<(std::ostream& os, const cv::GArrayDesc &desc); + +namespace detail +{ + // ConstructVec is a callback which stores information about T and is used by + // G-API runtime to construct arrays in host memory (T remains opaque for G-API). + // ConstructVec is carried into G-API internals by GArrayU. + // Currently it is suitable for Host (CPU) plugins only, real offload may require + // more information for manual memory allocation on-device. + class VectorRef; + using ConstructVec = std::function; + + // This is the base struct for GArrayU type holder + struct TypeHintBase{virtual ~TypeHintBase() = default;}; + + // This class holds type of initial GArray to be checked from GArrayU + template + struct TypeHint final : public TypeHintBase{}; + + // This class strips type information from GArray and makes it usable + // in the G-API graph compiler (expression unrolling, graph generation, etc). + // Part of GProtoArg. + class GAPI_EXPORTS GArrayU + { + public: + GArrayU(const GNode &n, std::size_t out); // Operation result constructor + + template + bool holds() const; // Check if was created from GArray + + GOrigin& priv(); // Internal use only + const GOrigin& priv() const; // Internal use only + + protected: + GArrayU(); // Default constructor + template friend class cv::GArray; // (available to GArray only) + + void setConstructFcn(ConstructVec &&cv); // Store T-aware constructor + + template + void specifyType(); // Store type of initial GArray + + std::shared_ptr m_priv; + std::shared_ptr m_hint; + }; + + template + bool GArrayU::holds() const{ + GAPI_Assert(m_hint != nullptr); + using U = typename std::decay::type; + return dynamic_cast*>(m_hint.get()) != nullptr; + }; + + template + void GArrayU::specifyType(){ + m_hint.reset(new TypeHint::type>); + }; + + // This class represents a typed STL vector reference. + // Depending on origins, this reference may be either "just a" reference to + // an object created externally, OR actually own the underlying object + // (be value holder). + class BasicVectorRef + { + public: + std::size_t m_elemSize = 0ul; + cv::GArrayDesc m_desc; + virtual ~BasicVectorRef() {} + + virtual void mov(BasicVectorRef &ref) = 0; + virtual const void* ptr() const = 0; + }; + + template class VectorRefT final: public BasicVectorRef + { + using empty_t = util::monostate; + using ro_ext_t = const std::vector *; + using rw_ext_t = std::vector *; + using rw_own_t = std::vector ; + util::variant m_ref; + + inline bool isEmpty() const { return util::holds_alternative(m_ref); } + inline bool isROExt() const { return util::holds_alternative(m_ref); } + inline bool isRWExt() const { return util::holds_alternative(m_ref); } + inline bool isRWOwn() const { return util::holds_alternative(m_ref); } + + void init(const std::vector* vec = nullptr) + { + m_elemSize = sizeof(T); + if (vec) m_desc = cv::descr_of(*vec); + } + + public: + VectorRefT() { init(); } + virtual ~VectorRefT() {} + + explicit VectorRefT(const std::vector& vec) : m_ref(&vec) { init(&vec); } + explicit VectorRefT(std::vector& vec) : m_ref(&vec) { init(&vec); } + explicit VectorRefT(std::vector&& vec) : m_ref(std::move(vec)) { init(&vec); } + + // Reset a VectorRefT. Called only for objects instantiated + // internally in G-API (e.g. temporary GArray's within a + // computation). Reset here means both initialization + // (creating an object) and reset (discarding its existing + // content before the next execution). Must never be called + // for external VectorRefTs. + void reset() + { + if (isEmpty()) + { + std::vector empty_vector; + m_desc = cv::descr_of(empty_vector); + m_ref = std::move(empty_vector); + GAPI_Assert(isRWOwn()); + } + else if (isRWOwn()) + { + util::get(m_ref).clear(); + } + else GAPI_Assert(false); // shouldn't be called in *EXT modes + } + + // Obtain a WRITE reference to underlying object + // Used by CPU kernel API wrappers when a kernel execution frame + // is created + std::vector& wref() + { + GAPI_Assert(isRWExt() || isRWOwn()); + if (isRWExt()) return *util::get(m_ref); + if (isRWOwn()) return util::get(m_ref); + util::throw_error(std::logic_error("Impossible happened")); + } + + // Obtain a READ reference to underlying object + // Used by CPU kernel API wrappers when a kernel execution frame + // is created + const std::vector& rref() const + { + // ANY vector can be accessed for reading, even if it declared for + // output. Example -- a GComputation from [in] to [out1,out2] + // where [out2] is a result of operation applied to [out1]: + // + // GComputation boundary + // . . . . . . . + // . . + // [in] ----> foo() ----> [out1] + // . . : + // . . . .:. . . + // . V . + // . bar() ---> [out2] + // . . . . . . . . . . . . + // + if (isROExt()) return *util::get(m_ref); + if (isRWExt()) return *util::get(m_ref); + if (isRWOwn()) return util::get(m_ref); + util::throw_error(std::logic_error("Impossible happened")); + } + + virtual void mov(BasicVectorRef &v) override { + VectorRefT *tv = dynamic_cast*>(&v); + GAPI_Assert(tv != nullptr); + wref() = std::move(tv->wref()); + } + + virtual const void* ptr() const override { return &rref(); } + }; + + // This class strips type information from VectorRefT<> and makes it usable + // in the G-API executables (carrying run-time data/information to kernels). + // Part of GRunArg. + // Its methods are typed proxies to VectorRefT. + // VectorRef maintains "reference" semantics so two copies of VectoRef refer + // to the same underlying object. + // FIXME: Put a good explanation on why cv::OutputArray doesn't fit this role + class VectorRef + { + std::shared_ptr m_ref; + + template inline void check() const + { + GAPI_DbgAssert(dynamic_cast*>(m_ref.get()) != nullptr); + GAPI_Assert(sizeof(T) == m_ref->m_elemSize); + } + + public: + VectorRef() = default; + template explicit VectorRef(const std::vector& vec) : m_ref(new VectorRefT(vec)) {} + template explicit VectorRef(std::vector& vec) : m_ref(new VectorRefT(vec)) {} + template explicit VectorRef(std::vector&& vec) : m_ref(new VectorRefT(vec)) {} + + template void reset() + { + if (!m_ref) m_ref.reset(new VectorRefT()); + + check(); + static_cast&>(*m_ref).reset(); + } + + template std::vector& wref() + { + check(); + return static_cast&>(*m_ref).wref(); + } + + template const std::vector& rref() const + { + check(); + return static_cast&>(*m_ref).rref(); + } + + void mov(VectorRef &v) + { + m_ref->mov(*v.m_ref); + } + + cv::GArrayDesc descr_of() const + { + return m_ref->m_desc; + } + + // May be used to uniquely identify this object internally + const void *ptr() const { return m_ref->ptr(); } + }; + + // Helper (FIXME: work-around?) + // stripping G types to their host types + // like cv::GArray would still map to std::vector + // but not to std::vector +#if defined(GAPI_STANDALONE) +# define FLATTEN_NS cv::gapi::own +#else +# define FLATTEN_NS cv +#endif + template struct flatten_g; + template<> struct flatten_g { using type = FLATTEN_NS::Mat; }; + template<> struct flatten_g { using type = FLATTEN_NS::Scalar; }; + template struct flatten_g { using type = T; }; +#undef FLATTEN_NS + // FIXME: the above mainly duplicates "ProtoToParam" thing from gtyped.hpp + // but I decided not to include gtyped here - probably worth moving that stuff + // to some common place? (DM) +} // namespace detail + +/** \addtogroup gapi_data_objects + * @{ + */ + +template class GArray +{ +public: + GArray() { putDetails(); } // Empty constructor + explicit GArray(detail::GArrayU &&ref) // GArrayU-based constructor + : m_ref(ref) { putDetails(); } // (used by GCall, not for users) + + detail::GArrayU strip() const { return m_ref; } + +private: + // Host type (or Flat type) - the type this GArray is actually + // specified to. + using HT = typename detail::flatten_g::type>::type; + + static void VCTor(detail::VectorRef& vref) { + vref.reset(); + } + void putDetails() { + m_ref.setConstructFcn(&VCTor); + m_ref.specifyType(); + } + + detail::GArrayU m_ref; +}; + +/** @} */ + +} // namespace cv + +#endif // OPENCV_GAPI_GARRAY_HPP diff --git a/IPL/include/opencv/opencv2/gapi/gasync_context.hpp b/IPL/include/opencv/opencv2/gapi/gasync_context.hpp new file mode 100644 index 0000000..69ce530 --- /dev/null +++ b/IPL/include/opencv/opencv2/gapi/gasync_context.hpp @@ -0,0 +1,56 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// +// Copyright (C) 2019 Intel Corporation + +#ifndef OPENCV_GAPI_GASYNC_CONTEXT_HPP +#define OPENCV_GAPI_GASYNC_CONTEXT_HPP + +#if !defined(GAPI_STANDALONE) +# include +#else // Without OpenCV +# include +#endif // !defined(GAPI_STANDALONE) + +#include + +namespace cv { +namespace gapi{ +namespace wip { + +/** + * @brief A class to group async requests to cancel them in a single shot. + * + * GAsyncContext is passed as an argument to async() and async_apply() functions + */ + +class GAPI_EXPORTS GAsyncContext{ + std::atomic cancelation_requested = {false}; +public: + /** + * @brief Start cancellation process for an associated request. + * + * User still has to wait for each individual request (either via callback or according std::future object) to make sure it actually canceled. + * + * @return true if it was a first request to cancel the context + */ + bool cancel(); + + /** + * @brief Returns true if cancellation was requested for this context. + * + * @return true if cancellation was requested for this context + */ + bool isCanceled() const; +}; + +class GAPI_EXPORTS GAsyncCanceled : public std::exception { +public: + virtual const char* what() const noexcept CV_OVERRIDE; +}; +} // namespace wip +} // namespace gapi +} // namespace cv + +#endif //OPENCV_GAPI_GASYNC_CONTEXT_HPP diff --git a/IPL/include/opencv/opencv2/gapi/gcall.hpp b/IPL/include/opencv/opencv2/gapi/gcall.hpp new file mode 100644 index 0000000..ed5ba5f --- /dev/null +++ b/IPL/include/opencv/opencv2/gapi/gcall.hpp @@ -0,0 +1,71 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// +// Copyright (C) 2018 Intel Corporation + + +#ifndef OPENCV_GAPI_GCALL_HPP +#define OPENCV_GAPI_GCALL_HPP + +#include // GArg +#include // GMat +#include // GScalar +#include // GArray +#include // GOpaque + +namespace cv { + +struct GKernel; + +// The whole idea of this class is to represent an operation +// which is applied to arguments. This is part of public API, +// since it is what users should use to define kernel interfaces. + +class GAPI_EXPORTS GCall final +{ +public: + class Priv; + + explicit GCall(const GKernel &k); + ~GCall(); + + template + GCall& pass(Ts&&... args) + { + setArgs({cv::GArg(std::move(args))...}); + return *this; + } + + // A generic yield method - obtain a link to operator's particular GMat output + GMat yield (int output = 0); + GMatP yieldP (int output = 0); + GScalar yieldScalar(int output = 0); + + template GArray yieldArray(int output = 0) + { + return GArray(yieldArray(output)); + } + + template GOpaque yieldOpaque(int output = 0) + { + return GOpaque(yieldOpaque(output)); + } + + // Internal use only + Priv& priv(); + const Priv& priv() const; + +protected: + std::shared_ptr m_priv; + + void setArgs(std::vector &&args); + + // Public versions return a typed array or opaque, those are implementation details + detail::GArrayU yieldArray(int output = 0); + detail::GOpaqueU yieldOpaque(int output = 0); +}; + +} // namespace cv + +#endif // OPENCV_GAPI_GCALL_HPP diff --git a/IPL/include/opencv/opencv2/gapi/gcommon.hpp b/IPL/include/opencv/opencv2/gapi/gcommon.hpp new file mode 100644 index 0000000..9ee75f7 --- /dev/null +++ b/IPL/include/opencv/opencv2/gapi/gcommon.hpp @@ -0,0 +1,169 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// +// Copyright (C) 2018 Intel Corporation + + +#ifndef OPENCV_GAPI_GCOMMON_HPP +#define OPENCV_GAPI_GCOMMON_HPP + +#include // std::hash +#include // std::vector +#include // decay + +#include + +#include +#include +#include + +namespace cv { + +namespace detail +{ + // This is a trait-like structure to mark backend-specific compile arguments + // with tags + template struct CompileArgTag; + + // These structures are tags which separate kernels and transformations + struct KernelTag + {}; + struct TransformTag + {}; +} + +// This definition is here because it is reused by both public(?) and internal +// modules. Keeping it here wouldn't expose public details (e.g., API-level) +// to components which are internal and operate on a lower-level entities +// (e.g., compiler, backends). +// FIXME: merge with ArgKind? +// FIXME: replace with variant[format desc]? +enum class GShape: int +{ + GMAT, + GSCALAR, + GARRAY, + GOPAQUE, +}; + +struct GCompileArg; + +namespace detail { + template + using is_compile_arg = std::is_same::type>; +} +// CompileArg is an unified interface over backend-specific compilation +// information +// FIXME: Move to a separate file? +/** \addtogroup gapi_compile_args + * @{ + * + * @brief Compilation arguments: data structures controlling the + * compilation process + * + * G-API comes with a number of graph compilation options which can be + * passed to cv::GComputation::apply() or + * cv::GComputation::compile(). Known compilation options are listed + * in this page, while extra backends may introduce their own + * compilation options (G-API transparently accepts _everything_ which + * can be passed to cv::compile_args(), it depends on underlying + * backends if an option would be interpreted or not). + * + * For example, if an example computation is executed like this: + * + * @snippet modules/gapi/samples/api_ref_snippets.cpp graph_decl_apply + * + * Extra parameter specifying which kernels to compile with can be + * passed like this: + * + * @snippet modules/gapi/samples/api_ref_snippets.cpp apply_with_param + */ + +/** + * @brief Represents an arbitrary compilation argument. + * + * Any value can be wrapped into cv::GCompileArg, but only known ones + * (to G-API or its backends) can be interpreted correctly. + * + * Normally objects of this class shouldn't be created manually, use + * cv::compile_args() function which automatically wraps everything + * passed in (a variadic template parameter pack) into a vector of + * cv::GCompileArg objects. + */ +struct GAPI_EXPORTS GCompileArg +{ +public: + std::string tag; + + // FIXME: use decay in GArg/other trait-based wrapper before leg is shot! + template::value, int>::type = 0> + explicit GCompileArg(T &&t) + : tag(detail::CompileArgTag::type>::tag()) + , arg(t) + { + } + + template T& get() + { + return util::any_cast(arg); + } + + template const T& get() const + { + return util::any_cast(arg); + } + +private: + util::any arg; +}; + +using GCompileArgs = std::vector; + +/** + * Wraps a list of arguments (a parameter pack) into a vector of + * compilation arguments (cv::GCompileArg). + */ +template GCompileArgs compile_args(Ts&&... args) +{ + return GCompileArgs{ GCompileArg(args)... }; +} + +/** + * @brief Ask G-API to dump compiled graph in Graphviz format under + * the given file name. + * + * Specifies a graph dump path (path to .dot file to be generated). + * G-API will dump a .dot file under specified path during a + * compilation process if this flag is passed. + */ +struct graph_dump_path +{ + std::string m_dump_path; +}; +/** @} */ + +namespace detail +{ + template<> struct CompileArgTag + { + static const char* tag() { return "gapi.graph_dump_path"; } + }; +} + +} // namespace cv + +// std::hash overload for GShape +namespace std +{ +template<> struct hash +{ + size_t operator() (cv::GShape sh) const + { + return std::hash()(static_cast(sh)); + } +}; +} // namespace std + + +#endif // OPENCV_GAPI_GCOMMON_HPP diff --git a/IPL/include/opencv/opencv2/gapi/gcompiled.hpp b/IPL/include/opencv/opencv2/gapi/gcompiled.hpp new file mode 100644 index 0000000..b08451a --- /dev/null +++ b/IPL/include/opencv/opencv2/gapi/gcompiled.hpp @@ -0,0 +1,219 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// +// Copyright (C) 2018 Intel Corporation + + +#ifndef OPENCV_GAPI_GCOMPILED_HPP +#define OPENCV_GAPI_GCOMPILED_HPP + +#include + +#include +#include +#include + +namespace cv { + +// This class represents a compiled computation. +// In theory (and ideally), it can be used w/o the rest of APIs. +// In theory (and ideally), it can be serialized/deserialized. +// It can enable scenarious like deployment to an autonomous devince, FuSa, etc. +// +// Currently GCompiled assumes all GMats you used to pass data to G-API +// are valid and not destroyed while you use a GCompiled object. +// +// FIXME: In future, there should be a way to name I/O objects and specify it +// to GCompiled externally (for example, when it is loaded on the target system). + +/** + * \addtogroup gapi_main_classes + * @{ + */ +/** + * @brief Represents a compiled computation (graph). Can only be used + * with image / data formats & resolutions it was compiled for, with + * some exceptions. + * + * This class represents a product of graph compilation (calling + * cv::GComputation::compile()). Objects of this class actually do + * data processing, and graph execution is incapsulated into objects + * of this class. Execution model itself depends on kernels and + * backends which were using during the compilation, see @ref + * gapi_compile_args for details. + * + * In a general case, GCompiled objects can be applied to data only in + * that formats/resolutions they were compiled for (see @ref + * gapi_meta_args). However, if the underlying backends allow, a + * compiled object can be _reshaped_ to handle data (images) of + * different resolution, though formats and types must remain the same. + * + * GCompiled is very similar to `std::function<>` in its semantics -- + * running it looks like a function call in the user code. + * + * At the moment, GCompiled objects are not reentrant -- generally, + * the objects are stateful since graph execution itself is a stateful + * process and this state is now maintained in GCompiled's own memory + * (not on the process stack). + * + * At the same time, two different GCompiled objects produced from the + * single cv::GComputation are completely independent and can be used + * concurrently. + * + * @sa GStreamingCompiled + */ +class GAPI_EXPORTS GCompiled +{ +public: + /// @private + class GAPI_EXPORTS Priv; + + /** + * @brief Constructs an empty object + */ + GCompiled(); + + /** + * @brief Run the compiled computation, a generic version. + * + * @param ins vector of inputs to process. + * @param outs vector of outputs to produce. + * + * Input/output vectors must have the same number of elements as + * defined in the cv::GComputation protocol (at the moment of its + * construction). Shapes of elements also must conform to protocol + * (e.g. cv::Mat needs to be passed where cv::GMat has been + * declared as input, and so on). Run-time exception is generated + * otherwise. + * + * Objects in output vector may remain empty (like cv::Mat) -- + * G-API will automatically initialize output objects to proper formats. + * + * @note Don't construct GRunArgs/GRunArgsP objects manually, use + * cv::gin()/cv::gout() wrappers instead. + */ + void operator() (GRunArgs &&ins, GRunArgsP &&outs); // Generic arg-to-arg +#if !defined(GAPI_STANDALONE) + + /** + * @brief Execute an unary computation + * + * @overload + * @param in input cv::Mat for unary computation + * @param out output cv::Mat for unary computation + * process. + */ + void operator() (cv::Mat in, cv::Mat &out); // Unary overload + + /** + * @brief Execute an unary computation + * + * @overload + * @param in input cv::Mat for unary computation + * @param out output cv::Scalar for unary computation + * process. + */ + void operator() (cv::Mat in, cv::Scalar &out); // Unary overload (scalar) + + /** + * @brief Execute a binary computation + * + * @overload + * @param in1 first input cv::Mat for binary computation + * @param in2 second input cv::Mat for binary computation + * @param out output cv::Mat for binary computation + * process. + */ + void operator() (cv::Mat in1, cv::Mat in2, cv::Mat &out); // Binary overload + + /** + * @brief Execute an binary computation + * + * @overload + * @param in1 first input cv::Mat for binary computation + * @param in2 second input cv::Mat for binary computation + * @param out output cv::Scalar for binary computation + * process. + */ + void operator() (cv::Mat in1, cv::Mat in2, cv::Scalar &out); // Binary overload (scalar) + + /** + * @brief Execute a computation with arbitrary number of + * inputs/outputs. + * + * @overload + * @param ins vector of input cv::Mat objects to process by the + * computation. + * @param outs vector of output cv::Mat objects to produce by the + * computation. + * + * Numbers of elements in ins/outs vectors must match numbers of + * inputs/outputs which were used to define the source GComputation. + */ + void operator() (const std::vector &ins, // Compatibility overload + const std::vector &outs); +#endif // !defined(GAPI_STANDALONE) + /// @private + Priv& priv(); + + /** + * @brief Check if compiled object is valid (non-empty) + * + * @return true if the object is runnable (valid), false otherwise + */ + explicit operator bool () const; + + /** + * @brief Vector of metadata this graph was compiled for. + * + * @return Unless _reshape_ is not supported, return value is the + * same vector which was passed to cv::GComputation::compile() to + * produce this compiled object. Otherwise, it is the latest + * metadata vector passed to reshape() (if that call was + * successful). + */ + const GMetaArgs& metas() const; // Meta passed to compile() + + /** + * @brief Vector of metadata descriptions of graph outputs + * + * @return vector with formats/resolutions of graph's output + * objects, auto-inferred from input metadata vector by + * operations which form this computation. + * + * @note GCompiled objects produced from the same + * cv::GComputiation graph with different input metas may return + * different values in this vector. + */ + const GMetaArgs& outMetas() const; + + /** + * @brief Check if the underlying backends support reshape or not. + * + * @return true if supported, false otherwise. + */ + bool canReshape() const; + + /** + * @brief Reshape a compiled graph to support new image + * resolutions. + * + * Throws an exception if an error occurs. + * + * @param inMetas new metadata to reshape on. Vector size and + * metadata shapes must match the computation's protocol. + * @param args compilation arguments to use. + */ + // FIXME: Why it requires compile args? + void reshape(const GMetaArgs& inMetas, const GCompileArgs& args); + +protected: + /// @private + std::shared_ptr m_priv; +}; +/** @} */ + +} + +#endif // OPENCV_GAPI_GCOMPILED_HPP diff --git a/IPL/include/opencv/opencv2/gapi/gcompiled_async.hpp b/IPL/include/opencv/opencv2/gapi/gcompiled_async.hpp new file mode 100644 index 0000000..a0c2917 --- /dev/null +++ b/IPL/include/opencv/opencv2/gapi/gcompiled_async.hpp @@ -0,0 +1,73 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// +// Copyright (C) 2019 Intel Corporation + + +#ifndef OPENCV_GAPI_GCOMPILED_ASYNC_HPP +#define OPENCV_GAPI_GCOMPILED_ASYNC_HPP + +#include //for std::future +#include //for std::exception_ptr +#include //for std::function +#include +#include + +namespace cv { + //fwd declaration + class GCompiled; + +namespace gapi{ +namespace wip { + class GAsyncContext; + /** + These functions asynchronously (i.e. probably on a separate thread of execution) call GCompiled::operator() member function of their first argument with copies of rest of arguments (except callback) passed in. + The difference between the function is the way to get the completion notification (via callback or a waiting on std::future object) + If exception is occurred during execution of apply it is transferred to the callback (via function parameter) or passed to future (and will be thrown on call to std::future::get) + + N.B. : + Input arguments are copied on call to async function (actually on call to cv::gin) and thus do not have to outlive the actual completion of asynchronous activity. + While output arguments are "captured" by reference(pointer) and therefore _must_ outlive the asynchronous activity + (i.e. live at least until callback is called or future is unblocked) + + @param gcmpld Compiled computation (graph) to start asynchronously + @param callback Callback to be called when execution of gcmpld is done + @param ins Input parameters for gcmpld + @param outs Output parameters for gcmpld + */ + GAPI_EXPORTS void async(GCompiled& gcmpld, std::function&& callback, GRunArgs &&ins, GRunArgsP &&outs); + + /** @overload + @param gcmpld Compiled computation (graph) to run asynchronously + @param callback Callback to be called when execution of gcmpld is done + @param ins Input parameters for gcmpld + @param outs Output parameters for gcmpld + @param ctx Context this request belongs to + @see async GAsyncContext + */ + GAPI_EXPORTS void async(GCompiled& gcmpld, std::function&& callback, GRunArgs &&ins, GRunArgsP &&outs, GAsyncContext& ctx); + + /** @overload + @param gcmpld Compiled computation (graph) to run asynchronously + @param ins Input parameters for gcmpld + @param outs Output parameters for gcmpld + @return std::future object to wait for completion of async operation + @see async + */ + GAPI_EXPORTS std::future async(GCompiled& gcmpld, GRunArgs &&ins, GRunArgsP &&outs); + + /** + @param gcmpld Compiled computation (graph) to run asynchronously + @param ins Input parameters for gcmpld + @param outs Output parameters for gcmpld + @param ctx Context this request belongs to + @return std::future object to wait for completion of async operation + @see async GAsyncContext + */ + GAPI_EXPORTS std::future async(GCompiled& gcmpld, GRunArgs &&ins, GRunArgsP &&outs, GAsyncContext& ctx); +} // namespace wip +} // namespace gapi +} // namespace cv + +#endif // OPENCV_GAPI_GCOMPILED_ASYNC_HPP diff --git a/IPL/include/opencv/opencv2/gapi/gcompoundkernel.hpp b/IPL/include/opencv/opencv2/gapi/gcompoundkernel.hpp new file mode 100644 index 0000000..2f17064 --- /dev/null +++ b/IPL/include/opencv/opencv2/gapi/gcompoundkernel.hpp @@ -0,0 +1,129 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// +// Copyright (C) 2018-2019 Intel Corporation + + +#ifndef OPENCV_GAPI_GCOMPOUNDKERNEL_HPP +#define OPENCV_GAPI_GCOMPOUNDKERNEL_HPP + +#include +#include +#include +#include + +namespace cv { +namespace gapi +{ +namespace compound +{ + // FIXME User does not need to know about this function + // Needs that user may define compound kernels(as cpu kernels) + GAPI_EXPORTS cv::gapi::GBackend backend(); +} // namespace compound +} // namespace gapi + +namespace detail +{ + +struct GCompoundContext +{ + explicit GCompoundContext(const GArgs& in_args); + template + const T& inArg(int input) { return m_args.at(input).get(); } + + GArgs m_args; + GArgs m_results; +}; + +class GAPI_EXPORTS GCompoundKernel +{ +// Compound kernel must use all of it's inputs +public: + using F = std::function; + + explicit GCompoundKernel(const F& f); + void apply(GCompoundContext& ctx); + +protected: + F m_f; +}; + +template struct get_compound_in +{ + static T get(GCompoundContext &ctx, int idx) { return ctx.inArg(idx); } +}; + +template struct get_compound_in> +{ + static cv::GArray get(GCompoundContext &ctx, int idx) + { + auto array = cv::GArray(); + ctx.m_args[idx] = GArg(array); + return array; + } +}; + +template struct get_compound_in> +{ + static cv::GOpaque get(GCompoundContext &ctx, int idx) + { + auto opaq = cv::GOpaque(); + ctx.m_args[idx] = GArg(opaq); + return opaq; + } +}; + +template +struct GCompoundCallHelper; + +template +struct GCompoundCallHelper, std::tuple > +{ + template + static void expand_impl(GCompoundContext &ctx, detail::Seq, detail::Seq) + { + auto result = Impl::expand(get_compound_in::get(ctx, IIs)...); + auto tuple_return = tuple_wrap_helper::get(std::move(result)); + ctx.m_results = { cv::GArg(std::get(tuple_return))... }; + } + + static void expand(GCompoundContext &ctx) + { + expand_impl(ctx, + typename detail::MkSeq::type(), + typename detail::MkSeq::type()); + } +}; + +template +class GCompoundKernelImpl: public cv::detail::GCompoundCallHelper, + public cv::detail::KernelTag +{ + using P = cv::detail::GCompoundCallHelper; + +public: + using API = K; + + static cv::gapi::GBackend backend() { return cv::gapi::compound::backend(); } + static GCompoundKernel kernel() { return GCompoundKernel(&P::expand); } +}; + +} // namespace detail + + +/** + * Declares a new compound kernel. See this + * [documentation chapter](@ref gapi_kernel_compound) + * on compound kernels for more details. + * + * @param Name type name for new kernel + * @param API the interface this kernel implements + */ +#define GAPI_COMPOUND_KERNEL(Name, API) \ + struct Name: public cv::detail::GCompoundKernelImpl + +} // namespace cv + +#endif // OPENCV_GAPI_GCOMPOUNDKERNEL_HPP diff --git a/IPL/include/opencv/opencv2/gapi/gcomputation.hpp b/IPL/include/opencv/opencv2/gapi/gcomputation.hpp new file mode 100644 index 0000000..1ff874a --- /dev/null +++ b/IPL/include/opencv/opencv2/gapi/gcomputation.hpp @@ -0,0 +1,559 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// +// Copyright (C) 2018 Intel Corporation + + +#ifndef OPENCV_GAPI_GCOMPUTATION_HPP +#define OPENCV_GAPI_GCOMPUTATION_HPP + +#include + +#include +#include +#include +#include +#include +#include + +namespace cv { + +namespace detail +{ + // FIXME: move to algorithm, cover with separate tests + // FIXME: replace with O(1) version (both memory and compilation time) + template + struct last_type; + + template + struct last_type { using type = T;}; + + template + struct last_type { using type = typename last_type::type; }; + + template + using last_type_t = typename last_type::type; +} + +/** + * \addtogroup gapi_main_classes + * @{ + * + * @brief G-API classes for constructed and compiled graphs. + */ +/** + * @brief GComputation class represents a captured computation + * graph. GComputation objects form boundaries for expression code + * user writes with G-API, allowing to compile and execute it. + * + * G-API computations are defined with input/output data + * objects. G-API will track automatically which operations connect + * specified outputs to the inputs, forming up a call graph to be + * executed. The below example expresses calculation of Sobel operator + * for edge detection (\f$G = \sqrt{G_x^2 + G_y^2}\f$): + * + * @snippet modules/gapi/samples/api_ref_snippets.cpp graph_def + * + * Full pipeline can be now captured with this object declaration: + * + * @snippet modules/gapi/samples/api_ref_snippets.cpp graph_cap_full + * + * Input/output data objects on which a call graph should be + * reconstructed are passed using special wrappers cv::GIn and + * cv::GOut. G-API will track automatically which operations form a + * path from inputs to outputs and build the execution graph appropriately. + * + * Note that cv::GComputation doesn't take ownership on data objects + * it is defined. Moreover, multiple GComputation objects may be + * defined on the same expressions, e.g. a smaller pipeline which + * expects that image gradients are already pre-calculated may be + * defined like this: + * + * @snippet modules/gapi/samples/api_ref_snippets.cpp graph_cap_sub + * + * The resulting graph would expect two inputs and produce one + * output. In this case, it doesn't matter if gx/gy data objects are + * results of cv::gapi::Sobel operators -- G-API will stop unrolling + * expressions and building the underlying graph one reaching this + * data objects. + * + * The way how GComputation is defined is important as its definition + * specifies graph _protocol_ -- the way how the graph should be + * used. Protocol is defined by number of inputs, number of outputs, + * and shapes of inputs and outputs. + * + * In the above example, sobelEdge expects one Mat on input and + * produces one Mat; while sobelEdgeSub expects two Mats on input and + * produces one Mat. GComputation's protocol defines how other + * computation methods should be used -- cv::GComputation::compile() and + * cv::GComputation::apply(). For example, if a graph is defined on + * two GMat inputs, two cv::Mat objects have to be passed to apply() + * for execution. GComputation checks protocol correctness in runtime + * so passing a different number of objects in apply() or passing + * cv::Scalar instead of cv::Mat there would compile well as a C++ + * source but raise an exception in run-time. G-API also comes with a + * typed wrapper cv::GComputationT<> which introduces this type-checking in + * compile-time. + * + * cv::GComputation itself is a thin object which just captures what + * the graph is. The compiled graph (which actually process data) is + * represented by class GCompiled. Use compile() method to generate a + * compiled graph with given compile options. cv::GComputation can + * also be used to process data with implicit graph compilation + * on-the-fly, see apply() for details. + * + * GComputation is a reference-counted object -- once defined, all its + * copies will refer to the same instance. + * + * @sa GCompiled + */ +class GAPI_EXPORTS GComputation +{ +public: + class Priv; + typedef std::function Generator; + + // Various constructors enable different ways to define a computation: ///// + // 1. Generic constructors + /** + * @brief Define a computation using a generator function. + * + * Graph can be defined in-place directly at the moment of its + * construction with a lambda: + * + * @snippet modules/gapi/samples/api_ref_snippets.cpp graph_gen + * + * This may be useful since all temporary objects (cv::GMats) and + * namespaces can be localized to scope of lambda, without + * contaminating the parent scope with probably unnecessary objects + * and information. + * + * @param gen generator function which returns a cv::GComputation, + * see Generator. + */ + GComputation(const Generator& gen); // Generator + // overload + + /** + * @brief Generic GComputation constructor. + * + * Constructs a new graph with a given protocol, specified as a + * flow of operations connecting input/output objects. Throws if + * the passed boundaries are invalid, e.g. if there's no + * functional dependency (path) between given outputs and inputs. + * + * @param ins Input data vector. + * @param outs Output data vector. + * + * @note Don't construct GProtoInputArgs/GProtoOutputArgs objects + * directly, use cv::GIn()/cv::GOut() wrapper functions instead. + * + * @sa @ref gapi_data_objects + */ + GComputation(GProtoInputArgs &&ins, + GProtoOutputArgs &&outs); // Arg-to-arg overload + + // 2. Syntax sugar and compatibility overloads + /** + * @brief Defines an unary (one input -- one output) computation + * + * @overload + * @param in input GMat of the defined unary computation + * @param out output GMat of the defined unary computation + */ + GComputation(GMat in, GMat out); // Unary overload + + /** + * @brief Defines an unary (one input -- one output) computation + * + * @overload + * @param in input GMat of the defined unary computation + * @param out output GScalar of the defined unary computation + */ + GComputation(GMat in, GScalar out); // Unary overload (scalar) + + /** + * @brief Defines a binary (two inputs -- one output) computation + * + * @overload + * @param in1 first input GMat of the defined binary computation + * @param in2 second input GMat of the defined binary computation + * @param out output GMat of the defined binary computation + */ + GComputation(GMat in1, GMat in2, GMat out); // Binary overload + + /** + * @brief Defines a binary (two inputs -- one output) computation + * + * @overload + * @param in1 first input GMat of the defined binary computation + * @param in2 second input GMat of the defined binary computation + * @param out output GScalar of the defined binary computation + */ + GComputation(GMat in1, GMat in2, GScalar out); // Binary + // overload + // (scalar) + + /** + * @brief Defines a computation with arbitrary input/output number. + * + * @overload + * @param ins vector of inputs GMats for this computation + * @param outs vector of outputs GMats for this computation + * + * Use this overload for cases when number of computation + * inputs/outputs is not known in compile-time -- e.g. when graph + * is programmatically generated to build an image pyramid with + * the given number of levels, etc. + */ + GComputation(const std::vector &ins, // Compatibility overload + const std::vector &outs); + + // Various versions of apply(): //////////////////////////////////////////// + // 1. Generic apply() + /** + * @brief Compile graph on-the-fly and immediately execute it on + * the inputs data vectors. + * + * Number of input/output data objects must match GComputation's + * protocol, also types of host data objects (cv::Mat, cv::Scalar) + * must match the shapes of data objects from protocol (cv::GMat, + * cv::GScalar). If there's a mismatch, a run-time exception will + * be generated. + * + * Internally, a cv::GCompiled object is created for the given + * input format configuration, which then is executed on the input + * data immediately. cv::GComputation caches compiled objects + * produced within apply() -- if this method would be called next + * time with the same input parameters (image formats, image + * resolution, etc), the underlying compiled graph will be reused + * without recompilation. If new metadata doesn't match the cached + * one, the underlying compiled graph is regenerated. + * + * @note compile() always triggers a compilation process and + * produces a new GCompiled object regardless if a similar one has + * been cached via apply() or not. + * + * @param ins vector of input data to process. Don't create + * GRunArgs object manually, use cv::gin() wrapper instead. + * @param outs vector of output data to fill results in. cv::Mat + * objects may be empty in this vector, G-API will automatically + * initialize it with the required format & dimensions. Don't + * create GRunArgsP object manually, use cv::gout() wrapper instead. + * @param args a list of compilation arguments to pass to the + * underlying compilation process. Don't create GCompileArgs + * object manually, use cv::compile_args() wrapper instead. + * + * @sa @ref gapi_data_objects, @ref gapi_compile_args + */ + void apply(GRunArgs &&ins, GRunArgsP &&outs, GCompileArgs &&args = {}); // Arg-to-arg overload + + /// @private -- Exclude this function from OpenCV documentation + void apply(const std::vector& ins, // Compatibility overload + const std::vector& outs, + GCompileArgs &&args = {}); + + // 2. Syntax sugar and compatibility overloads +#if !defined(GAPI_STANDALONE) + /** + * @brief Execute an unary computation (with compilation on the fly) + * + * @overload + * @param in input cv::Mat for unary computation + * @param out output cv::Mat for unary computation + * @param args compilation arguments for underlying compilation + * process. + */ + void apply(cv::Mat in, cv::Mat &out, GCompileArgs &&args = {}); // Unary overload + + /** + * @brief Execute an unary computation (with compilation on the fly) + * + * @overload + * @param in input cv::Mat for unary computation + * @param out output cv::Scalar for unary computation + * @param args compilation arguments for underlying compilation + * process. + */ + void apply(cv::Mat in, cv::Scalar &out, GCompileArgs &&args = {}); // Unary overload (scalar) + + /** + * @brief Execute a binary computation (with compilation on the fly) + * + * @overload + * @param in1 first input cv::Mat for binary computation + * @param in2 second input cv::Mat for binary computation + * @param out output cv::Mat for binary computation + * @param args compilation arguments for underlying compilation + * process. + */ + void apply(cv::Mat in1, cv::Mat in2, cv::Mat &out, GCompileArgs &&args = {}); // Binary overload + + /** + * @brief Execute an binary computation (with compilation on the fly) + * + * @overload + * @param in1 first input cv::Mat for binary computation + * @param in2 second input cv::Mat for binary computation + * @param out output cv::Scalar for binary computation + * @param args compilation arguments for underlying compilation + * process. + */ + void apply(cv::Mat in1, cv::Mat in2, cv::Scalar &out, GCompileArgs &&args = {}); // Binary overload (scalar) + + /** + * @brief Execute a computation with arbitrary number of + * inputs/outputs (with compilation on-the-fly). + * + * @overload + * @param ins vector of input cv::Mat objects to process by the + * computation. + * @param outs vector of output cv::Mat objects to produce by the + * computation. + * @param args compilation arguments for underlying compilation + * process. + * + * Numbers of elements in ins/outs vectors must match numbers of + * inputs/outputs which were used to define this GComputation. + */ + void apply(const std::vector& ins, // Compatibility overload + std::vector& outs, + GCompileArgs &&args = {}); +#endif // !defined(GAPI_STANDALONE) + // Various versions of compile(): ////////////////////////////////////////// + // 1. Generic compile() - requires metas to be passed as vector + /** + * @brief Compile the computation for specific input format(s). + * + * This method triggers compilation process and produces a new + * GCompiled object which then can process data of the given + * format. Passing data with different format to the compiled + * computation will generate a run-time exception. + * + * @param in_metas vector of input metadata configuration. Grab + * metadata from real data objects (like cv::Mat or cv::Scalar) + * using cv::descr_of(), or create it on your own. + * @param args compilation arguments for this compilation + * process. Compilation arguments directly affect what kind of + * executable object would be produced, e.g. which kernels (and + * thus, devices) would be used to execute computation. + * + * @return GCompiled, an executable computation compiled + * specifically for the given input parameters. + * + * @sa @ref gapi_compile_args + */ + GCompiled compile(GMetaArgs &&in_metas, GCompileArgs &&args = {}); + + // 2. Syntax sugar - variadic list of metas, no extra compile args + // FIXME: SFINAE looks ugly in the generated documentation + /** + * @overload + * + * Takes a variadic parameter pack with metadata + * descriptors for which a compiled object needs to be produced. + * + * @return GCompiled, an executable computation compiled + * specifically for the given input parameters. + */ + template + auto compile(const Ts&... metas) -> + typename std::enable_if::value, GCompiled>::type + { + return compile(GMetaArgs{GMetaArg(metas)...}, GCompileArgs()); + } + + // 3. Syntax sugar - variadic list of metas, extra compile args + // (seems optional parameters don't work well when there's an variadic template + // comes first) + // + // Ideally it should look like: + // + // template + // GCompiled compile(const Ts&... metas, GCompileArgs &&args) + // + // But not all compilers can handle this (and seems they shouldn't be able to). + // FIXME: SFINAE looks ugly in the generated documentation + /** + * @overload + * + * Takes a variadic parameter pack with metadata + * descriptors for which a compiled object needs to be produced, + * followed by GCompileArgs object representing compilation + * arguments for this process. + * + * @return GCompiled, an executable computation compiled + * specifically for the given input parameters. + */ + template + auto compile(const Ts&... meta_and_compile_args) -> + typename std::enable_if::value + && std::is_same >::value, + GCompiled>::type + { + //FIXME: wrapping meta_and_compile_args into a tuple to unwrap them inside a helper function is the overkill + return compile(std::make_tuple(meta_and_compile_args...), + typename detail::MkSeq::type()); + } + + + // FIXME: Document properly in the Doxygen format + // Video-oriented pipeline compilation: + // 1. A generic version + /** + * @brief Compile the computation for streaming mode. + * + * This method triggers compilation process and produces a new + * GStreamingCompiled object which then can process video stream + * data of the given format. Passing a stream in a different + * format to the compiled computation will generate a run-time + * exception. + * + * @param in_metas vector of input metadata configuration. Grab + * metadata from real data objects (like cv::Mat or cv::Scalar) + * using cv::descr_of(), or create it on your own. + * + * @param args compilation arguments for this compilation + * process. Compilation arguments directly affect what kind of + * executable object would be produced, e.g. which kernels (and + * thus, devices) would be used to execute computation. + * + * @return GStreamingCompiled, a streaming-oriented executable + * computation compiled specifically for the given input + * parameters. + * + * @sa @ref gapi_compile_args + */ + GStreamingCompiled compileStreaming(GMetaArgs &&in_metas, GCompileArgs &&args = {}); + + /** + * @brief Compile the computation for streaming mode. + * + * This method triggers compilation process and produces a new + * GStreamingCompiled object which then can process video stream + * data in any format. Underlying mechanisms will be adjusted to + * every new input video stream automatically, but please note that + * _not all_ existing backends support this (see reshape()). + * + * @param args compilation arguments for this compilation + * process. Compilation arguments directly affect what kind of + * executable object would be produced, e.g. which kernels (and + * thus, devices) would be used to execute computation. + * + * @return GStreamingCompiled, a streaming-oriented executable + * computation compiled for any input image format. + * + * @sa @ref gapi_compile_args + */ + GStreamingCompiled compileStreaming(GCompileArgs &&args = {}); + + // 2. Direct metadata version + /** + * @overload + * + * Takes a variadic parameter pack with metadata + * descriptors for which a compiled object needs to be produced. + * + * @return GStreamingCompiled, a streaming-oriented executable + * computation compiled specifically for the given input + * parameters. + */ + template + auto compileStreaming(const Ts&... metas) -> + typename std::enable_if::value, GStreamingCompiled>::type + { + return compileStreaming(GMetaArgs{GMetaArg(metas)...}, GCompileArgs()); + } + + // 2. Direct metadata + compile arguments version + /** + * @overload + * + * Takes a variadic parameter pack with metadata + * descriptors for which a compiled object needs to be produced, + * followed by GCompileArgs object representing compilation + * arguments for this process. + * + * @return GStreamingCompiled, a streaming-oriented executable + * computation compiled specifically for the given input + * parameters. + */ + template + auto compileStreaming(const Ts&... meta_and_compile_args) -> + typename std::enable_if::value + && std::is_same >::value, + GStreamingCompiled>::type + { + //FIXME: wrapping meta_and_compile_args into a tuple to unwrap them inside a helper function is the overkill + return compileStreaming(std::make_tuple(meta_and_compile_args...), + typename detail::MkSeq::type()); + } + + // Internal use only + /// @private + Priv& priv(); + /// @private + const Priv& priv() const; + +protected: + + // 4. Helper methods for (3) + /// @private + template + GCompiled compile(const std::tuple &meta_and_compile_args, detail::Seq) + { + GMetaArgs meta_args = {GMetaArg(std::get(meta_and_compile_args))...}; + GCompileArgs comp_args = std::get(meta_and_compile_args); + return compile(std::move(meta_args), std::move(comp_args)); + } + template + GStreamingCompiled compileStreaming(const std::tuple &meta_and_compile_args, detail::Seq) + { + GMetaArgs meta_args = {GMetaArg(std::get(meta_and_compile_args))...}; + GCompileArgs comp_args = std::get(meta_and_compile_args); + return compileStreaming(std::move(meta_args), std::move(comp_args)); + } + /// @private + std::shared_ptr m_priv; +}; +/** @} */ + +namespace gapi +{ + // FIXME: all these standalone functions need to be added to some + // common documentation section + /** + * @brief Define an tagged island (subgraph) within a computation. + * + * Declare an Island tagged with `name` and defined from `ins` to `outs` + * (exclusively, as ins/outs are data objects, and regioning is done on + * operations level). + * Throws if any operation between `ins` and `outs` are already assigned + * to another island. + * + * Islands allow to partition graph into subgraphs, fine-tuning + * the way it is scheduled by the underlying executor. + * + * @param name name of the Island to create + * @param ins vector of input data objects where the subgraph + * begins + * @param outs vector of output data objects where the subgraph + * ends. + * + * The way how an island is defined is similar to how + * cv::GComputation is defined on input/output data objects. + * Same rules apply here as well -- if there's no functional + * dependency between inputs and outputs or there's not enough + * input data objects were specified to properly calculate all + * outputs, an exception is thrown. + * + * Use cv::GIn() / cv::GOut() to specify input/output vectors. + */ + void GAPI_EXPORTS island(const std::string &name, + GProtoInputArgs &&ins, + GProtoOutputArgs &&outs); +} // namespace gapi + +} // namespace cv +#endif // OPENCV_GAPI_GCOMPUTATION_HPP diff --git a/IPL/include/opencv/opencv2/gapi/gcomputation_async.hpp b/IPL/include/opencv/opencv2/gapi/gcomputation_async.hpp new file mode 100644 index 0000000..8af603e --- /dev/null +++ b/IPL/include/opencv/opencv2/gapi/gcomputation_async.hpp @@ -0,0 +1,69 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// +// Copyright (C) 2019 Intel Corporation + +#ifndef OPENCV_GAPI_GCOMPUTATION_ASYNC_HPP +#define OPENCV_GAPI_GCOMPUTATION_ASYNC_HPP + + +#include //for std::future +#include //for std::exception_ptr +#include //for std::function +#include //for GRunArgs, GRunArgsP +#include //for GCompileArgs +#include + + +namespace cv { + //fwd declaration + class GComputation; +namespace gapi { +namespace wip { + class GAsyncContext; + /** In contrast to async() functions, these do call GComputation::apply() member function of the GComputation passed in. + + @param gcomp Computation (graph) to run asynchronously + @param callback Callback to be called when execution of gcomp is done + @param ins Input parameters for gcomp + @param outs Output parameters for gcomp + @param args Compile arguments to pass to GComputation::apply() + @see async + */ + GAPI_EXPORTS void async_apply(GComputation& gcomp, std::function&& callback, GRunArgs &&ins, GRunArgsP &&outs, GCompileArgs &&args = {}); + /** @overload + @param gcomp Computation (graph) to run asynchronously + @param callback Callback to be called when execution of gcomp is done + @param ins Input parameters for gcomp + @param outs Output parameters for gcomp + @param args Compile arguments to pass to GComputation::apply() + @param ctx Context this request belongs to + @see async_apply async GAsyncContext + */ + GAPI_EXPORTS void async_apply(GComputation& gcomp, std::function&& callback, GRunArgs &&ins, GRunArgsP &&outs, GCompileArgs &&args, GAsyncContext& ctx); + /** @overload + @param gcomp Computation (graph) to run asynchronously + @param ins Input parameters for gcomp + @param outs Output parameters for gcomp + @param args Compile arguments to pass to GComputation::apply() + @return std::future object to wait for completion of async operation + @see async_apply async + */ + GAPI_EXPORTS std::future async_apply(GComputation& gcomp, GRunArgs &&ins, GRunArgsP &&outs, GCompileArgs &&args = {}); + /** @overload + @param gcomp Computation (graph) to run asynchronously + @param ins Input parameters for gcomp + @param outs Output parameters for gcomp + @param args Compile arguments to pass to GComputation::apply() + @param ctx Context this request belongs to + @return std::future object to wait for completion of async operation + @see async_apply async GAsyncContext + */ + GAPI_EXPORTS std::future async_apply(GComputation& gcomp, GRunArgs &&ins, GRunArgsP &&outs, GCompileArgs &&args, GAsyncContext& ctx); +} // namespace wip +} // namespace gapi +} // namespace cv + + +#endif //OPENCV_GAPI_GCOMPUTATION_ASYNC_HPP diff --git a/IPL/include/opencv/opencv2/gapi/gkernel.hpp b/IPL/include/opencv/opencv2/gapi/gkernel.hpp new file mode 100644 index 0000000..478d7d3 --- /dev/null +++ b/IPL/include/opencv/opencv2/gapi/gkernel.hpp @@ -0,0 +1,703 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// +// Copyright (C) 2018-2019 Intel Corporation + + +#ifndef OPENCV_GAPI_GKERNEL_HPP +#define OPENCV_GAPI_GKERNEL_HPP + +#include +#include +#include // string +#include // false_type, true_type +#include // map (for GKernelPackage) +#include // tuple + +#include // CompileArgTag +#include // Seq +#include +#include // GArg +#include // GMetaArg +#include // GTypeTraits +#include //suppress_unused_warning +#include + +namespace cv { + +using GShapes = std::vector; + +// GKernel describes kernel API to the system +// FIXME: add attributes of a kernel, (e.g. number and types +// of inputs, etc) +struct GAPI_EXPORTS GKernel +{ + using M = std::function; + + const std::string name; // kernel ID, defined by its API (signature) + const std::string tag; // some (implementation-specific) tag + const M outMeta; // generic adaptor to API::outMeta(...) + const GShapes outShapes; // types (shapes) kernel's outputs +}; + +// GKernelImpl describes particular kernel implementation to the system +struct GAPI_EXPORTS GKernelImpl +{ + util::any opaque; // backend-specific opaque info +}; + +template class GKernelTypeM; + +namespace detail +{ + //////////////////////////////////////////////////////////////////////////// + // yield() is used in graph construction time as a generic method to obtain + // lazy "return value" of G-API operations + // + namespace + { + template struct Yield; + template<> struct Yield + { + static inline cv::GMat yield(cv::GCall &call, int i) { return call.yield(i); } + }; + template<> struct Yield + { + static inline cv::GMatP yield(cv::GCall &call, int i) { return call.yieldP(i); } + }; + template<> struct Yield + { + static inline cv::GScalar yield(cv::GCall &call, int i) { return call.yieldScalar(i); } + }; + template struct Yield > + { + static inline cv::GArray yield(cv::GCall &call, int i) { return call.yieldArray(i); } + }; + template struct Yield > + { + static inline cv::GOpaque yield(cv::GCall &call, int i) { return call.yieldOpaque(i); } + }; + } // anonymous namespace + + //////////////////////////////////////////////////////////////////////////// + // Helper classes which brings outputMeta() marshalling to kernel + // implementations + // + // 1. MetaType establishes G#Type -> G#Meta mapping between G-API dynamic + // types and its metadata descriptor types. + // This mapping is used to transform types to call outMeta() callback. + template struct MetaType; + template<> struct MetaType { using type = GMatDesc; }; + template<> struct MetaType { using type = GMatDesc; }; + template<> struct MetaType { using type = GMatDesc; }; + template<> struct MetaType { using type = GScalarDesc; }; + template struct MetaType > { using type = GArrayDesc; }; + template struct MetaType > { using type = GOpaqueDesc; }; + template struct MetaType { using type = T; }; // opaque args passed as-is + + // 2. Hacky test based on MetaType to check if we operate on G-* type or not + template using is_nongapi_type = std::is_same::type>; + + // 3. Two ways to transform input arguments to its meta - for G-* and non-G* types: + template + typename std::enable_if::value, typename MetaType::type> + ::type get_in_meta(const GMetaArgs &in_meta, const GArgs &, int idx) + { + return util::get::type>(in_meta.at(idx)); + } + + template + typename std::enable_if::value, T> + ::type get_in_meta(const GMetaArgs &, const GArgs &in_args, int idx) + { + return in_args.at(idx).template get(); + } + + // 4. The MetaHelper itself: an entity which generates outMeta() call + // based on kernel signature, with arguments properly substituted. + // 4.1 - case for multiple return values + // FIXME: probably can be simplified with std::apply or analogue. + template + struct MetaHelper; + + template + struct MetaHelper, std::tuple > + { + template + static GMetaArgs getOutMeta_impl(const GMetaArgs &in_meta, + const GArgs &in_args, + detail::Seq, + detail::Seq) + { + // FIXME: decay? + using R = std::tuple::type...>; + const R r = K::outMeta( get_in_meta(in_meta, in_args, IIs)... ); + return GMetaArgs{ GMetaArg(std::get(r))... }; + } + // FIXME: help users identify how outMeta must look like (via default impl w/static_assert?) + + static GMetaArgs getOutMeta(const GMetaArgs &in_meta, + const GArgs &in_args) + { + return getOutMeta_impl(in_meta, + in_args, + typename detail::MkSeq::type(), + typename detail::MkSeq::type()); + } + }; + + // 4.1 - case for a single return value + // FIXME: How to avoid duplication here? + template + struct MetaHelper, Out > + { + template + static GMetaArgs getOutMeta_impl(const GMetaArgs &in_meta, + const GArgs &in_args, + detail::Seq) + { + // FIXME: decay? + using R = typename MetaType::type; + const R r = K::outMeta( get_in_meta(in_meta, in_args, IIs)... ); + return GMetaArgs{ GMetaArg(r) }; + } + // FIXME: help users identify how outMeta must look like (via default impl w/static_assert?) + + static GMetaArgs getOutMeta(const GMetaArgs &in_meta, + const GArgs &in_args) + { + return getOutMeta_impl(in_meta, + in_args, + typename detail::MkSeq::type()); + } + }; + + //////////////////////////////////////////////////////////////////////////// + // Helper class to introduce tags to calls. By default there's no tag + struct NoTag { + static constexpr const char *tag() { return ""; } + }; + +} // namespace detail + +// GKernelType and GKernelTypeM are base classes which implement typed ::on() +// method based on kernel signature. GKernelTypeM stands for multiple-return-value kernels +// +// G_TYPED_KERNEL and G_TYPED_KERNEL_M macros inherit user classes from GKernelType and +// GKernelTypeM respectively. + +template +class GKernelTypeM(Args...)> > + : public detail::MetaHelper, std::tuple> + , public detail::NoTag +{ + template + static std::tuple yield(cv::GCall &call, detail::Seq) + { + return std::make_tuple(detail::Yield::yield(call, IIs)...); + } + +public: + using InArgs = std::tuple; + using OutArgs = std::tuple; + + static std::tuple on(Args... args) + { + cv::GCall call(GKernel{K::id(), K::tag(), &K::getOutMeta, {detail::GTypeTraits::shape...}}); + call.pass(args...); + return yield(call, typename detail::MkSeq::type()); + } +}; + +template class GKernelType; + +template +class GKernelType > + : public detail::MetaHelper, R> + , public detail::NoTag +{ +public: + using InArgs = std::tuple; + using OutArgs = std::tuple; + + static_assert(!cv::detail::contains::value, "Values of GFrame type can't be used as operation outputs"); + + static R on(Args... args) + { + cv::GCall call(GKernel{K::id(), K::tag(), &K::getOutMeta, {detail::GTypeTraits::shape}}); + call.pass(args...); + return detail::Yield::yield(call, 0); + } +}; + +namespace detail { +// This tiny class eliminates the semantic difference between +// GKernelType and GKernelTypeM. +template class KernelTypeMedium; + +template +class KernelTypeMedium(Args...)>> : + public cv::GKernelTypeM(Args...)>> {}; + +template +class KernelTypeMedium> : + public cv::GKernelType> {}; +} // namespace detail + +} // namespace cv + + +// FIXME: I don't know a better way so far. Feel free to suggest one +// The problem is that every typed kernel should have ::id() but body +// of the class is defined by user (with outMeta, other stuff) + +//! @cond IGNORED +#define G_ID_HELPER_CLASS(Class) Class##IdHelper + +#define G_ID_HELPER_BODY(Class, Id) \ + struct G_ID_HELPER_CLASS(Class) \ + { \ + static constexpr const char * id() {return Id;} \ + }; \ +//! @endcond + +#define GET_G_TYPED_KERNEL(_1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, NAME, ...) NAME +#define COMBINE_SIGNATURE(...) __VA_ARGS__ +// Ensure correct __VA_ARGS__ expansion on Windows +#define __WRAP_VAARGS(x) x + +/** + * Helper for G_TYPED_KERNEL declares a new G-API Operation. See [Kernel API](@ref gapi_kernel_api) + * for more details. + * + * @param Class type name for this operation. + * @param API an `std::function<>`-like signature for the operation; + * return type is a single value or a tuple of multiple values. + * @param Id string identifier for the operation. Must be unique. + */ +#define G_TYPED_KERNEL_HELPER(Class, API, Id) \ + G_ID_HELPER_BODY(Class, Id) \ + struct Class final: public cv::detail::KernelTypeMedium, \ + public G_ID_HELPER_CLASS(Class) +// {body} is to be defined by user + +#define G_TYPED_KERNEL_HELPER_2(Class, _1, _2, Id) \ +G_TYPED_KERNEL_HELPER(Class, COMBINE_SIGNATURE(_1, _2), Id) + +#define G_TYPED_KERNEL_HELPER_3(Class, _1, _2, _3, Id) \ +G_TYPED_KERNEL_HELPER(Class, COMBINE_SIGNATURE(_1, _2, _3), Id) + +#define G_TYPED_KERNEL_HELPER_4(Class, _1, _2, _3, _4, Id) \ +G_TYPED_KERNEL_HELPER(Class, COMBINE_SIGNATURE(_1, _2, _3, _4), Id) + +#define G_TYPED_KERNEL_HELPER_5(Class, _1, _2, _3, _4, _5, Id) \ +G_TYPED_KERNEL_HELPER(Class, COMBINE_SIGNATURE(_1, _2, _3, _4, _5), Id) + +#define G_TYPED_KERNEL_HELPER_6(Class, _1, _2, _3, _4, _5, _6, Id) \ +G_TYPED_KERNEL_HELPER(Class, COMBINE_SIGNATURE(_1, _2, _3, _4, _5, _6), Id) + +#define G_TYPED_KERNEL_HELPER_7(Class, _1, _2, _3, _4, _5, _6, _7, Id) \ +G_TYPED_KERNEL_HELPER(Class, COMBINE_SIGNATURE(_1, _2, _3, _4, _5, _6, _7), Id) + +#define G_TYPED_KERNEL_HELPER_8(Class, _1, _2, _3, _4, _5, _6, _7, _8, Id) \ +G_TYPED_KERNEL_HELPER(Class, COMBINE_SIGNATURE(_1, _2, _3, _4, _5, _6, _7, _8), Id) + +#define G_TYPED_KERNEL_HELPER_9(Class, _1, _2, _3, _4, _5, _6, _7, _8, _9, Id) \ +G_TYPED_KERNEL_HELPER(Class, COMBINE_SIGNATURE(_1, _2, _3, _4, _5, _6, _7, _8, _9), Id) + +#define G_TYPED_KERNEL_HELPER_10(Class, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, Id) \ +G_TYPED_KERNEL_HELPER(Class, COMBINE_SIGNATURE(_1, _2, _3, _4, _5, _6, _7, _8, _9, _10), Id) + +/** + * Declares a new G-API Operation. See [Kernel API](@ref gapi_kernel_api) + * for more details. + * + * @param Class type name for this operation. + */ +#define G_TYPED_KERNEL(Class, ...) __WRAP_VAARGS(GET_G_TYPED_KERNEL(__VA_ARGS__, \ + G_TYPED_KERNEL_HELPER_10, \ + G_TYPED_KERNEL_HELPER_9, \ + G_TYPED_KERNEL_HELPER_8, \ + G_TYPED_KERNEL_HELPER_7, \ + G_TYPED_KERNEL_HELPER_6, \ + G_TYPED_KERNEL_HELPER_5, \ + G_TYPED_KERNEL_HELPER_4, \ + G_TYPED_KERNEL_HELPER_3, \ + G_TYPED_KERNEL_HELPER_2, \ + G_TYPED_KERNEL_HELPER)(Class, __VA_ARGS__)) \ + +/** + * Declares a new G-API Operation. See [Kernel API](@ref gapi_kernel_api) for more details. + * + * @deprecated This macro is deprecated in favor of `G_TYPED_KERNEL` that is used for declaring any + * G-API Operation. + * + * @param Class type name for this operation. + */ +#define G_TYPED_KERNEL_M G_TYPED_KERNEL + +#define G_API_OP G_TYPED_KERNEL +#define G_API_OP_M G_API_OP + +namespace cv +{ +namespace gapi +{ + // Prework: model "Device" API before it gets to G-API headers. + // FIXME: Don't mix with internal Backends class! + class GAPI_EXPORTS GBackend + { + public: + class Priv; + + // TODO: make it template (call `new` within??) + GBackend(); + explicit GBackend(std::shared_ptr &&p); + + Priv& priv(); + const Priv& priv() const; + std::size_t hash() const; + + bool operator== (const GBackend &rhs) const; + + private: + std::shared_ptr m_priv; + }; + + inline bool operator != (const GBackend &lhs, const GBackend &rhs) + { + return !(lhs == rhs); + } +} // namespace gapi +} // namespace cv + +namespace std +{ + template<> struct hash + { + std::size_t operator() (const cv::gapi::GBackend &b) const + { + return b.hash(); + } + }; +} // namespace std + + +namespace cv { +namespace gapi { + class GFunctor + { + public: + virtual cv::GKernelImpl impl() const = 0; + virtual cv::gapi::GBackend backend() const = 0; + const char* id() const { return m_id; } + + virtual ~GFunctor() = default; + protected: + GFunctor(const char* id) : m_id(id) { }; + private: + const char* m_id; + }; + + /** \addtogroup gapi_compile_args + * @{ + */ + + // FIXME: Hide implementation + /** + * @brief A container class for heterogeneous kernel + * implementation collections and graph transformations. + * + * GKernelPackage is a special container class which stores kernel + * _implementations_ and graph _transformations_. Objects of this class + * are created and passed to cv::GComputation::compile() to specify + * which kernels to use and which transformations to apply in the + * compiled graph. GKernelPackage may contain kernels of + * different backends, e.g. be heterogeneous. + * + * The most easy way to create a kernel package is to use function + * cv::gapi::kernels(). This template functions takes kernel + * implementations in form of type list (variadic template) and + * generates a kernel package atop of that. + * + * Kernel packages can be also generated programmatically, starting + * with an empty package (created with the default constructor) + * and then by populating it with kernels via call to + * GKernelPackage::include(). Note this method is also a template + * one since G-API kernel and transformation implementations are _types_, + * not objects. + * + * Finally, two kernel packages can be combined into a new one + * with function cv::gapi::combine(). + */ + class GAPI_EXPORTS GKernelPackage + { + + /// @private + using M = std::unordered_map>; + + /// @private + M m_id_kernels; + + /// @private + std::vector m_transformations; + + protected: + /// @private + // Check if package contains ANY implementation of a kernel API + // by API textual id. + bool includesAPI(const std::string &id) const; + + /// @private + // Remove ALL implementations of the given API (identified by ID) + void removeAPI(const std::string &id); + + /// @private + // Partial include() specialization for kernels + template + typename std::enable_if<(std::is_base_of::value), void>::type + includeHelper() + { + auto backend = KImpl::backend(); + auto kernel_id = KImpl::API::id(); + auto kernel_impl = GKernelImpl{KImpl::kernel()}; + removeAPI(kernel_id); + + m_id_kernels[kernel_id] = std::make_pair(backend, kernel_impl); + } + + /// @private + // Partial include() specialization for transformations + template + typename std::enable_if<(std::is_base_of::value), void>::type + includeHelper() + { + m_transformations.emplace_back(TImpl::transformation()); + } + + public: + void include(const GFunctor& functor) + { + m_id_kernels[functor.id()] = std::make_pair(functor.backend(), functor.impl()); + } + /** + * @brief Returns total number of kernels + * in the package (across all backends included) + * + * @return a number of kernels in the package + */ + std::size_t size() const; + + /** + * @brief Returns vector of transformations included in the package + * + * @return vector of transformations included in the package + */ + const std::vector& get_transformations() const; + + /** + * @brief Test if a particular kernel _implementation_ KImpl is + * included in this kernel package. + * + * @sa includesAPI() + * + * @note cannot be applied to transformations + * + * @return true if there is such kernel, false otherwise. + */ + template + bool includes() const + { + static_assert(std::is_base_of::value, + "includes() can be applied to kernels only"); + + auto kernel_it = m_id_kernels.find(KImpl::API::id()); + return kernel_it != m_id_kernels.end() && + kernel_it->second.first == KImpl::backend(); + } + + /** + * @brief Remove all kernels associated with the given backend + * from the package. + * + * Does nothing if there's no kernels of this backend in the package. + * + * @param backend backend which kernels to remove + */ + void remove(const GBackend& backend); + + /** + * @brief Remove all kernels implementing the given API from + * the package. + * + * Does nothing if there's no kernels implementing the given interface. + */ + template + void remove() + { + removeAPI(KAPI::id()); + } + + // FIXME: Rename to includes() and distinguish API/impl case by + // statically? + /** + * Check if package contains ANY implementation of a kernel API + * by API type. + */ + template + bool includesAPI() const + { + return includesAPI(KAPI::id()); + } + + // FIXME: The below comment is wrong, and who needs this function? + /** + * @brief Find a kernel (by its API) + * + * Returns implementation corresponding id. + * Throws if nothing found. + * + * @return Backend which hosts matching kernel implementation. + * + */ + template + GBackend lookup() const + { + return lookup(KAPI::id()).first; + } + + /// @private + std::pair + lookup(const std::string &id) const; + + // FIXME: No overwrites allowed? + /** + * @brief Put a new kernel implementation or a new transformation + * KImpl into the package. + */ + template + void include() + { + includeHelper(); + } + + /** + * @brief Lists all backends which are included into package + * + * @return vector of backends + */ + std::vector backends() const; + + // TODO: Doxygen bug -- it wants me to place this comment + // here, not below. + /** + * @brief Create a new package based on `lhs` and `rhs`. + * + * @param lhs "Left-hand-side" package in the process + * @param rhs "Right-hand-side" package in the process + * @return a new kernel package. + */ + friend GAPI_EXPORTS GKernelPackage combine(const GKernelPackage &lhs, + const GKernelPackage &rhs); + }; + + /** + * @brief Create a kernel package object containing kernels + * and transformations specified in variadic template argument. + * + * In G-API, kernel implementations and transformations are _types_. + * Every backend has its own kernel API (like GAPI_OCV_KERNEL() and + * GAPI_FLUID_KERNEL()) but all of that APIs define a new type for + * each kernel implementation. + * + * Use this function to pass kernel implementations (defined in + * either way) and transformations to the system. Example: + * + * @snippet modules/gapi/samples/api_ref_snippets.cpp kernels_snippet + * + * Note that kernels() itself is a function returning object, not + * a type, so having `()` at the end is important -- it must be a + * function call. + */ + template GKernelPackage kernels() + { + // FIXME: currently there is no check that transformations' signatures are unique + // and won't be any intersection in graph compilation stage + static_assert(detail::all_unique::value, "Kernels API must be unique"); + + GKernelPackage pkg; + + // For those who wonder - below is a trick to call a number of + // methods based on parameter pack (zeroes just help hiding these + // calls into a sequence which helps to expand this parameter pack). + // Just note that `f(),a` always equals to `a` (with f() called!) + // and parentheses are used to hide function call in the expanded sequence. + // Leading 0 helps to handle case when KK is an empty list (kernels<>()). + int unused[] = { 0, (pkg.include(), 0)... }; + cv::util::suppress_unused_warning(unused); + return pkg; + }; + + template + GKernelPackage kernels(FF&... functors) + { + GKernelPackage pkg; + int unused[] = { 0, (pkg.include(functors), 0)... }; + cv::util::suppress_unused_warning(unused); + return pkg; + }; + + /** @} */ + + // FYI - this function is already commented above + GAPI_EXPORTS GKernelPackage combine(const GKernelPackage &lhs, + const GKernelPackage &rhs); + + /** + * @brief Combines multiple G-API kernel packages into one + * + * @overload + * + * This function successively combines the passed kernel packages using a right fold. + * Calling `combine(a, b, c)` is equal to `combine(a, combine(b, c))`. + * + * @return The resulting kernel package + */ + template + GKernelPackage combine(const GKernelPackage &a, const GKernelPackage &b, Ps&&... rest) + { + return combine(a, combine(b, rest...)); + } + + /** \addtogroup gapi_compile_args + * @{ + */ + /** + * @brief cv::use_only() is a special combinator which hints G-API to use only + * kernels specified in cv::GComputation::compile() (and not to extend kernels available by + * default with that package). + */ + struct GAPI_EXPORTS use_only + { + GKernelPackage pkg; + }; + /** @} */ + +} // namespace gapi + +namespace detail +{ + template<> struct CompileArgTag + { + static const char* tag() { return "gapi.kernel_package"; } + }; + + template<> struct CompileArgTag + { + static const char* tag() { return "gapi.use_only"; } + }; +} // namespace detail +} // namespace cv + +#endif // OPENCV_GAPI_GKERNEL_HPP diff --git a/IPL/include/opencv/opencv2/gapi/gmat.hpp b/IPL/include/opencv/opencv2/gapi/gmat.hpp new file mode 100644 index 0000000..eed9364 --- /dev/null +++ b/IPL/include/opencv/opencv2/gapi/gmat.hpp @@ -0,0 +1,232 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// +// Copyright (C) 2018 Intel Corporation + + +#ifndef OPENCV_GAPI_GMAT_HPP +#define OPENCV_GAPI_GMAT_HPP + +#include +#include // std::shared_ptr + +#include +#include // GShape + +#include + +// TODO GAPI_EXPORTS or so +namespace cv +{ +// Forward declaration; GNode and GOrigin are an internal +// (user-inaccessible) classes. +class GNode; +struct GOrigin; + +/** \addtogroup gapi_data_objects + * @{ + * + * @brief G-API data objects used to build G-API expressions. + * + * These objects do not own any particular data (except compile-time + * associated values like with cv::GScalar) and are used to construct + * graphs. + * + * Every graph in G-API starts and ends with data objects. + * + * Once constructed and compiled, G-API operates with regular host-side + * data instead. Refer to the below table to find the mapping between + * G-API and regular data types. + * + * G-API data type | I/O data type + * ------------------ | ------------- + * cv::GMat | cv::Mat + * cv::GScalar | cv::Scalar + * `cv::GArray` | std::vector + * `cv::GOpaque` | T + */ +class GAPI_EXPORTS GMat +{ +public: + GMat(); // Empty constructor + GMat(const GNode &n, std::size_t out); // Operation result constructor + + GOrigin& priv(); // Internal use only + const GOrigin& priv() const; // Internal use only + +private: + std::shared_ptr m_priv; +}; + +class GAPI_EXPORTS GMatP : public GMat +{ +public: + using GMat::GMat; +}; + +class GAPI_EXPORTS GFrame : public GMat +{ +public: + using GMat::GMat; +}; + +namespace gapi { namespace own { + class Mat; +}}//gapi::own + +/** @} */ + +/** + * \addtogroup gapi_meta_args + * @{ + */ +struct GAPI_EXPORTS GMatDesc +{ + // FIXME: Default initializers in C++14 + int depth; + int chan; + cv::Size size; // NB.: no multi-dimensional cases covered yet + bool planar; + std::vector dims; // FIXME: Maybe it's real questionable to have it here + + GMatDesc(int d, int c, cv::Size s, bool p = false) + : depth(d), chan(c), size(s), planar(p) {} + + GMatDesc(int d, const std::vector &dd) + : depth(d), chan(-1), size{-1,-1}, planar(false), dims(dd) {} + + GMatDesc(int d, std::vector &&dd) + : depth(d), chan(-1), size{-1,-1}, planar(false), dims(std::move(dd)) {} + + GMatDesc() : GMatDesc(-1, -1, {-1,-1}) {} + + inline bool operator== (const GMatDesc &rhs) const + { + return depth == rhs.depth + && chan == rhs.chan + && size == rhs.size + && planar == rhs.planar + && dims == rhs.dims; + } + + inline bool operator!= (const GMatDesc &rhs) const + { + return !(*this == rhs); + } + + bool isND() const { return !dims.empty(); } + + // Checks if the passed mat can be described by this descriptor + // (it handles the case when + // 1-channel mat can be reinterpreted as is (1-channel mat) + // and as a 3-channel planar mat with height divided by 3) + bool canDescribe(const cv::gapi::own::Mat& mat) const; + + // Meta combinator: return a new GMatDesc which differs in size by delta + // (all other fields are taken unchanged from this GMatDesc) + // FIXME: a better name? + GMatDesc withSizeDelta(cv::Size delta) const + { + GMatDesc desc(*this); + desc.size += delta; + return desc; + } +#if !defined(GAPI_STANDALONE) + bool canDescribe(const cv::Mat& mat) const; +#endif // !defined(GAPI_STANDALONE) + // Meta combinator: return a new GMatDesc which differs in size by delta + // (all other fields are taken unchanged from this GMatDesc) + // + // This is an overload. + GMatDesc withSizeDelta(int dx, int dy) const + { + return withSizeDelta(cv::Size{dx,dy}); + } + + GMatDesc withSize(cv::Size sz) const + { + GMatDesc desc(*this); + desc.size = sz; + return desc; + } + + // Meta combinator: return a new GMatDesc with specified data depth. + // (all other fields are taken unchanged from this GMatDesc) + GMatDesc withDepth(int ddepth) const + { + GAPI_Assert(CV_MAT_CN(ddepth) == 1 || ddepth == -1); + GMatDesc desc(*this); + if (ddepth != -1) desc.depth = ddepth; + return desc; + } + + // Meta combinator: return a new GMatDesc with specified data depth + // and number of channels. + // (all other fields are taken unchanged from this GMatDesc) + GMatDesc withType(int ddepth, int dchan) const + { + GAPI_Assert(CV_MAT_CN(ddepth) == 1 || ddepth == -1); + GMatDesc desc = withDepth(ddepth); + desc.chan = dchan; + return desc; + } + + // Meta combinator: return a new GMatDesc with planar flag set + // (no size changes are performed, only channel interpretation is changed + // (interleaved -> planar) + GMatDesc asPlanar() const + { + GAPI_Assert(planar == false); + GMatDesc desc(*this); + desc.planar = true; + return desc; + } + + // Meta combinator: return a new GMatDesc + // reinterpreting 1-channel input as planar image + // (size height is divided by plane number) + GMatDesc asPlanar(int planes) const + { + GAPI_Assert(planar == false); + GAPI_Assert(chan == 1); + GAPI_Assert(planes > 1); + GAPI_Assert(size.height % planes == 0); + GMatDesc desc(*this); + desc.size.height /= planes; + desc.chan = planes; + return desc.asPlanar(); + } + + // Meta combinator: return a new GMatDesc with planar flag set to false + // (no size changes are performed, only channel interpretation is changed + // (planar -> interleaved) + GMatDesc asInterleaved() const + { + GAPI_Assert(planar == true); + GMatDesc desc(*this); + desc.planar = false; + return desc; + } +}; + +static inline GMatDesc empty_gmat_desc() { return GMatDesc{-1,-1,{-1,-1}}; } + +#if !defined(GAPI_STANDALONE) +class Mat; +GAPI_EXPORTS GMatDesc descr_of(const cv::Mat &mat); +GAPI_EXPORTS GMatDesc descr_of(const cv::UMat &mat); +#endif // !defined(GAPI_STANDALONE) + +/** @} */ + +// FIXME: WHY??? WHY it is under different namespace? +namespace gapi { namespace own { + GAPI_EXPORTS GMatDesc descr_of(const Mat &mat); +}}//gapi::own + +GAPI_EXPORTS std::ostream& operator<<(std::ostream& os, const cv::GMatDesc &desc); + +} // namespace cv + +#endif // OPENCV_GAPI_GMAT_HPP diff --git a/IPL/include/opencv/opencv2/gapi/gmetaarg.hpp b/IPL/include/opencv/opencv2/gapi/gmetaarg.hpp new file mode 100644 index 0000000..39f087f --- /dev/null +++ b/IPL/include/opencv/opencv2/gapi/gmetaarg.hpp @@ -0,0 +1,80 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// +// Copyright (C) 2018 Intel Corporation + + +#ifndef OPENCV_GAPI_GMETAARG_HPP +#define OPENCV_GAPI_GMETAARG_HPP + +#include +#include + +#include +#include + +#include +#include +#include +#include + +namespace cv +{ +// FIXME: Rename to GMeta? +// FIXME: user shouldn't deal with it - put to detail? +// GMetaArg is an union type over descriptions of G-types which can serve as +// GComputation's in/output slots. +// +// GMetaArg objects are passed as arguments to GComputation::compile() +// to specify which data a compiled computation should be specialized on. +// For manual compile(), user must supply this metadata, in case of apply() +// this metadata is taken from arguments computation should operate on. +// +// The first type (monostate) is equal to "uninitialized"/"unresolved" meta. +using GMetaArg = util::variant + < util::monostate + , GMatDesc + , GScalarDesc + , GArrayDesc + , GOpaqueDesc + >; +GAPI_EXPORTS std::ostream& operator<<(std::ostream& os, const GMetaArg &); + +using GMetaArgs = std::vector; + +namespace detail +{ + // These traits are used by GComputation::compile() + + // FIXME: is_constructible doesn't work as variant doesn't do any SFINAE + // in its current template constructor + + template struct is_meta_descr : std::false_type {}; + template<> struct is_meta_descr : std::true_type {}; + template<> struct is_meta_descr : std::true_type {}; + template<> struct is_meta_descr : std::true_type {}; + template<> struct is_meta_descr : std::true_type {}; + + template + using are_meta_descrs = all_satisfy; + + template + using are_meta_descrs_but_last = all_satisfy::type>; + +} // namespace detail + +// Note: descr_of(std::vector<..>) returns a GArrayDesc, while +// descrs_of(std::vector<..>) returns an array of Meta args! +class Mat; +class UMat; +GAPI_EXPORTS cv::GMetaArgs descrs_of(const std::vector &vec); +GAPI_EXPORTS cv::GMetaArgs descrs_of(const std::vector &vec); +namespace gapi { namespace own { + class Mat; + GAPI_EXPORTS cv::GMetaArgs descrs_of(const std::vector &vec); +}} // namespace gapi::own + +} // namespace cv + +#endif // OPENCV_GAPI_GMETAARG_HPP diff --git a/IPL/include/opencv/opencv2/gapi/gopaque.hpp b/IPL/include/opencv/opencv2/gapi/gopaque.hpp new file mode 100644 index 0000000..f5d06bb --- /dev/null +++ b/IPL/include/opencv/opencv2/gapi/gopaque.hpp @@ -0,0 +1,294 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// +// Copyright (C) 2019 Intel Corporation + + +#ifndef OPENCV_GAPI_GOPAQUE_HPP +#define OPENCV_GAPI_GOPAQUE_HPP + +#include +#include +#include + +#include +#include + +#include +#include +#include + +namespace cv +{ +// Forward declaration; GNode and GOrigin are an internal +// (user-inaccessible) classes. +class GNode; +struct GOrigin; +template class GOpaque; + +/** + * \addtogroup gapi_meta_args + * @{ + */ +struct GOpaqueDesc +{ + // FIXME: Body + // FIXME: Also implement proper operator== then + bool operator== (const GOpaqueDesc&) const { return true; } +}; +template GOpaqueDesc descr_of(const U &) { return {};} +static inline GOpaqueDesc empty_gopaque_desc() {return {}; } +/** @} */ + +std::ostream& operator<<(std::ostream& os, const cv::GOpaqueDesc &desc); + +namespace detail +{ + // ConstructOpaque is a callback which stores information about T and is used by + // G-API runtime to construct an object in host memory (T remains opaque for G-API). + // ConstructOpaque is carried into G-API internals by GOpaqueU. + // Currently it is suitable for Host (CPU) plugins only, real offload may require + // more information for manual memory allocation on-device. + class OpaqueRef; + using ConstructOpaque = std::function; + + // FIXME: garray.hpp already contains hint classes (for actual T type verification), + // need to think where it can be moved (currently opaque uses it from garray) + + // This class strips type information from GOpaque and makes it usable + // in the G-API graph compiler (expression unrolling, graph generation, etc). + // Part of GProtoArg. + class GAPI_EXPORTS GOpaqueU + { + public: + GOpaqueU(const GNode &n, std::size_t out); // Operation result constructor + + template + bool holds() const; // Check if was created from GOpaque + + GOrigin& priv(); // Internal use only + const GOrigin& priv() const; // Internal use only + + protected: + GOpaqueU(); // Default constructor + template friend class cv::GOpaque; // (available for GOpaque only) + + void setConstructFcn(ConstructOpaque &&cv); // Store T-aware constructor + + template + void specifyType(); // Store type of initial GOpaque + + std::shared_ptr m_priv; + std::shared_ptr m_hint; + }; + + template + bool GOpaqueU::holds() const{ + GAPI_Assert(m_hint != nullptr); + using U = typename std::decay::type; + return dynamic_cast*>(m_hint.get()) != nullptr; + }; + + template + void GOpaqueU::specifyType(){ + m_hint.reset(new TypeHint::type>); + }; + + // This class represents a typed object reference. + // Depending on origins, this reference may be either "just a" reference to + // an object created externally, OR actually own the underlying object + // (be value holder). + class BasicOpaqueRef + { + public: + cv::GOpaqueDesc m_desc; + virtual ~BasicOpaqueRef() {} + + virtual void mov(BasicOpaqueRef &ref) = 0; + virtual const void* ptr() const = 0; + }; + + template class OpaqueRefT final: public BasicOpaqueRef + { + using empty_t = util::monostate; + using ro_ext_t = const T *; + using rw_ext_t = T *; + using rw_own_t = T ; + util::variant m_ref; + + inline bool isEmpty() const { return util::holds_alternative(m_ref); } + inline bool isROExt() const { return util::holds_alternative(m_ref); } + inline bool isRWExt() const { return util::holds_alternative(m_ref); } + inline bool isRWOwn() const { return util::holds_alternative(m_ref); } + + void init(const T* obj = nullptr) + { + if (obj) m_desc = cv::descr_of(*obj); + } + + public: + OpaqueRefT() { init(); } + virtual ~OpaqueRefT() {} + + explicit OpaqueRefT(const T& obj) : m_ref(&obj) { init(&obj); } + explicit OpaqueRefT( T& obj) : m_ref(&obj) { init(&obj); } + explicit OpaqueRefT( T&& obj) : m_ref(std::move(obj)) { init(&obj); } + + // Reset a OpaqueRefT. Called only for objects instantiated + // internally in G-API (e.g. temporary GOpaque's within a + // computation). Reset here means both initialization + // (creating an object) and reset (discarding its existing + // content before the next execution). Must never be called + // for external OpaqueRefTs. + void reset() + { + if (isEmpty()) + { + T empty_obj{}; + m_desc = cv::descr_of(empty_obj); + m_ref = std::move(empty_obj); + GAPI_Assert(isRWOwn()); + } + else if (isRWOwn()) + { + util::get(m_ref) = {}; + } + else GAPI_Assert(false); // shouldn't be called in *EXT modes + } + + // Obtain a WRITE reference to underlying object + // Used by CPU kernel API wrappers when a kernel execution frame + // is created + T& wref() + { + GAPI_Assert(isRWExt() || isRWOwn()); + if (isRWExt()) return *util::get(m_ref); + if (isRWOwn()) return util::get(m_ref); + util::throw_error(std::logic_error("Impossible happened")); + } + + // Obtain a READ reference to underlying object + // Used by CPU kernel API wrappers when a kernel execution frame + // is created + const T& rref() const + { + // ANY object can be accessed for reading, even if it declared for + // output. Example -- a GComputation from [in] to [out1,out2] + // where [out2] is a result of operation applied to [out1]: + // + // GComputation boundary + // . . . . . . . + // . . + // [in] ----> foo() ----> [out1] + // . . : + // . . . .:. . . + // . V . + // . bar() ---> [out2] + // . . . . . . . . . . . . + // + if (isROExt()) return *util::get(m_ref); + if (isRWExt()) return *util::get(m_ref); + if (isRWOwn()) return util::get(m_ref); + util::throw_error(std::logic_error("Impossible happened")); + } + + virtual void mov(BasicOpaqueRef &v) override { + OpaqueRefT *tv = dynamic_cast*>(&v); + GAPI_Assert(tv != nullptr); + wref() = std::move(tv->wref()); + } + + virtual const void* ptr() const override { return &rref(); } + }; + + // This class strips type information from OpaqueRefT<> and makes it usable + // in the G-API executables (carrying run-time data/information to kernels). + // Part of GRunArg. + // Its methods are typed proxies to OpaqueRefT. + // OpaqueRef maintains "reference" semantics so two copies of OpaqueRef refer + // to the same underlying object. + class OpaqueRef + { + std::shared_ptr m_ref; + + template inline void check() const + { + GAPI_DbgAssert(dynamic_cast*>(m_ref.get()) != nullptr); + } + + public: + OpaqueRef() = default; + + template explicit OpaqueRef(T&& obj) : + m_ref(new OpaqueRefT::type>(std::forward(obj))) {} + + template void reset() + { + if (!m_ref) m_ref.reset(new OpaqueRefT()); + + check(); + static_cast&>(*m_ref).reset(); + } + + template T& wref() + { + check(); + return static_cast&>(*m_ref).wref(); + } + + template const T& rref() const + { + check(); + return static_cast&>(*m_ref).rref(); + } + + void mov(OpaqueRef &v) + { + m_ref->mov(*v.m_ref); + } + + cv::GOpaqueDesc descr_of() const + { + return m_ref->m_desc; + } + + // May be used to uniquely identify this object internally + const void *ptr() const { return m_ref->ptr(); } + }; +} // namespace detail + +/** \addtogroup gapi_data_objects + * @{ + */ + +template class GOpaque +{ +public: + GOpaque() { putDetails(); } // Empty constructor + explicit GOpaque(detail::GOpaqueU &&ref) // GOpaqueU-based constructor + : m_ref(ref) { putDetails(); } // (used by GCall, not for users) + + detail::GOpaqueU strip() const { return m_ref; } + +private: + // Host type (or Flat type) - the type this GOpaque is actually + // specified to. + using HT = typename detail::flatten_g::type>::type; + + static void CTor(detail::OpaqueRef& ref) { + ref.reset(); + } + void putDetails() { + m_ref.setConstructFcn(&CTor); + m_ref.specifyType(); + } + + detail::GOpaqueU m_ref; +}; + +/** @} */ + +} // namespace cv + +#endif // OPENCV_GAPI_GOPAQUE_HPP diff --git a/IPL/include/opencv/opencv2/gapi/gproto.hpp b/IPL/include/opencv/opencv2/gapi/gproto.hpp new file mode 100644 index 0000000..8858199 --- /dev/null +++ b/IPL/include/opencv/opencv2/gapi/gproto.hpp @@ -0,0 +1,136 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// +// Copyright (C) 2018 Intel Corporation + + +#ifndef OPENCV_GAPI_GPROTO_HPP +#define OPENCV_GAPI_GPROTO_HPP + +#include +#include +#include + +#include + +#include +#include +#include +#include +#include +#include + +namespace cv { + +// FIXME: user shouldn't deal with it - put to detail? +// GProtoArg is an union type over G-types which can serve as +// GComputation's in/output slots. In other words, GProtoArg +// wraps any type which can serve as G-API exchange type. +// +// In Runtime, GProtoArgs are substituted with appropriate GRunArgs. +// +// GProtoArg objects are constructed in-place when user describes +// (captures) computations, user doesn't interact with these types +// directly. +using GProtoArg = util::variant + < GMat + , GMatP + , GFrame + , GScalar + , detail::GArrayU // instead of GArray + , detail::GOpaqueU // instead of GOpaque + >; + +using GProtoArgs = std::vector; + +namespace detail +{ +template inline GProtoArgs packArgs(Ts... args) +{ + return GProtoArgs{ GProtoArg(wrap_gapi_helper::wrap(args))... }; +} + +} + +template +struct GIOProtoArgs +{ +public: + explicit GIOProtoArgs(const GProtoArgs& args) : m_args(args) {} + explicit GIOProtoArgs(GProtoArgs &&args) : m_args(std::move(args)) {} + + GProtoArgs m_args; +}; + +struct In_Tag{}; +struct Out_Tag{}; + +using GProtoInputArgs = GIOProtoArgs; +using GProtoOutputArgs = GIOProtoArgs; + +// Perfect forwarding +template inline GProtoInputArgs GIn(Ts&&... ts) +{ + return GProtoInputArgs(detail::packArgs(std::forward(ts)...)); +} + +template inline GProtoOutputArgs GOut(Ts&&... ts) +{ + return GProtoOutputArgs(detail::packArgs(std::forward(ts)...)); +} + +namespace detail +{ + // Extract elements form tuple + // FIXME: Someday utilize a generic tuple_to_vec<> routine + template + static GProtoOutputArgs getGOut_impl(const std::tuple& ts, detail::Seq) + { + return GProtoOutputArgs{ detail::packArgs(std::get(ts)...)}; + } +} + +template inline GProtoOutputArgs GOut(const std::tuple& ts) +{ + // TODO: think of std::forward(ts) + return detail::getGOut_impl(ts, typename detail::MkSeq::type()); +} + +// Takes rvalue as input arg +template inline GProtoOutputArgs GOut(std::tuple&& ts) +{ + // TODO: think of std::forward(ts) + return detail::getGOut_impl(ts, typename detail::MkSeq::type()); +} + +// Extract run-time arguments from node origin +// Can be used to extract constant values associated with G-objects +// (like GScalar) at graph construction time +GRunArg value_of(const GOrigin &origin); + +// Transform run-time computation arguments into a collection of metadata +// extracted from that arguments +GMetaArg GAPI_EXPORTS descr_of(const GRunArg &arg ); +GMetaArgs GAPI_EXPORTS descr_of(const GRunArgs &args); + +// Transform run-time operation result argument into metadata extracted from that argument +// Used to compare the metadata, which generated at compile time with the metadata result operation in run time +GMetaArg GAPI_EXPORTS descr_of(const GRunArgP& argp); + +// Checks if run-time computation argument can be described by metadata +bool GAPI_EXPORTS can_describe(const GMetaArg& meta, const GRunArg& arg); +bool GAPI_EXPORTS can_describe(const GMetaArgs& metas, const GRunArgs& args); + +// Checks if run-time computation result argument can be described by metadata. +// Used to check if the metadata generated at compile time +// coincides with output arguments passed to computation in cpu and ocl backends +bool GAPI_EXPORTS can_describe(const GMetaArg& meta, const GRunArgP& argp); + +// Validates input arguments +void GAPI_EXPORTS validate_input_arg(const GRunArg& arg); +void GAPI_EXPORTS validate_input_args(const GRunArgs& args); + +} // namespace cv + +#endif // OPENCV_GAPI_GPROTO_HPP diff --git a/IPL/include/opencv/opencv2/gapi/gpu/core.hpp b/IPL/include/opencv/opencv2/gapi/gpu/core.hpp new file mode 100644 index 0000000..a7ee595 --- /dev/null +++ b/IPL/include/opencv/opencv2/gapi/gpu/core.hpp @@ -0,0 +1,27 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// +// Copyright (C) 2018 Intel Corporation + + +#ifndef OPENCV_GAPI_GPU_CORE_API_HPP +#define OPENCV_GAPI_GPU_CORE_API_HPP +/** @file +* @deprecated Use instead. +*/ + +#include + +namespace cv { +namespace gapi { +namespace core { +namespace gpu { + using namespace ocl; +} // namespace gpu +} // namespace core +} // namespace gapi +} // namespace cv + + +#endif // OPENCV_GAPI_GPU_CORE_API_HPP diff --git a/IPL/include/opencv/opencv2/gapi/gpu/ggpukernel.hpp b/IPL/include/opencv/opencv2/gapi/gpu/ggpukernel.hpp new file mode 100644 index 0000000..b52c21d --- /dev/null +++ b/IPL/include/opencv/opencv2/gapi/gpu/ggpukernel.hpp @@ -0,0 +1,18 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// +// Copyright (C) 2018 Intel Corporation + + +#ifndef OPENCV_GAPI_GGPUKERNEL_HPP +#define OPENCV_GAPI_GGPUKERNEL_HPP +/** @file +* @deprecated Use instead. +*/ + +#include +#define GAPI_GPU_KERNEL GAPI_OCL_KERNEL + + +#endif // OPENCV_GAPI_GGPUKERNEL_HPP diff --git a/IPL/include/opencv/opencv2/gapi/gpu/imgproc.hpp b/IPL/include/opencv/opencv2/gapi/gpu/imgproc.hpp new file mode 100644 index 0000000..b0df7ae --- /dev/null +++ b/IPL/include/opencv/opencv2/gapi/gpu/imgproc.hpp @@ -0,0 +1,28 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// +// Copyright (C) 2018 Intel Corporation + + +#ifndef OPENCV_GAPI_GPU_IMGPROC_API_HPP +#define OPENCV_GAPI_GPU_IMGPROC_API_HPP +/** @file +* @deprecated Use instead. +*/ + +#include + + +namespace cv { +namespace gapi { +namespace imgproc { +namespace gpu { + using namespace ocl; +} // namespace gpu +} // namespace imgproc +} // namespace gapi +} // namespace cv + + +#endif // OPENCV_GAPI_GPU_IMGPROC_API_HPP diff --git a/IPL/include/opencv/opencv2/gapi/gscalar.hpp b/IPL/include/opencv/opencv2/gapi/gscalar.hpp new file mode 100644 index 0000000..be20048 --- /dev/null +++ b/IPL/include/opencv/opencv2/gapi/gscalar.hpp @@ -0,0 +1,75 @@ +// This file is part of OpenCV project. + +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// +// Copyright (C) 2018 Intel Corporation + + +#ifndef OPENCV_GAPI_GSCALAR_HPP +#define OPENCV_GAPI_GSCALAR_HPP + +#include + +#include +#include // GShape +#include + +namespace cv +{ +// Forward declaration; GNode and GOrigin are an internal +// (user-inaccessible) classes. +class GNode; +struct GOrigin; + +/** \addtogroup gapi_data_objects + * @{ + */ + +class GAPI_EXPORTS GScalar +{ +public: + GScalar(); // Empty constructor + explicit GScalar(const cv::Scalar& s); // Constant value constructor from cv::Scalar + explicit GScalar(cv::Scalar&& s); // Constant value move-constructor from cv::Scalar + + GScalar(double v0); // Constant value constructor from double + GScalar(const GNode &n, std::size_t out); // Operation result constructor + + GOrigin& priv(); // Internal use only + const GOrigin& priv() const; // Internal use only + +private: + std::shared_ptr m_priv; +}; + +/** @} */ + +/** + * \addtogroup gapi_meta_args + * @{ + */ +struct GScalarDesc +{ + // NB.: right now it is empty + + inline bool operator== (const GScalarDesc &) const + { + return true; // NB: implement this method if GScalar meta appears + } + + inline bool operator!= (const GScalarDesc &rhs) const + { + return !(*this == rhs); + } +}; + +static inline GScalarDesc empty_scalar_desc() { return GScalarDesc(); } + +GAPI_EXPORTS GScalarDesc descr_of(const cv::Scalar &scalar); + +std::ostream& operator<<(std::ostream& os, const cv::GScalarDesc &desc); + +} // namespace cv + +#endif // OPENCV_GAPI_GSCALAR_HPP diff --git a/IPL/include/opencv/opencv2/gapi/gstreaming.hpp b/IPL/include/opencv/opencv2/gapi/gstreaming.hpp new file mode 100644 index 0000000..7079042 --- /dev/null +++ b/IPL/include/opencv/opencv2/gapi/gstreaming.hpp @@ -0,0 +1,231 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// +// Copyright (C) 2018 Intel Corporation + + +#ifndef OPENCV_GAPI_GSTREAMING_COMPILED_HPP +#define OPENCV_GAPI_GSTREAMING_COMPILED_HPP + +#include + +#include +#include +#include +#include + +namespace cv { + +/** + * \addtogroup gapi_main_classes + * @{ + */ +/** + * @brief Represents a computation (graph) compiled for streaming. + * + * This class represents a product of graph compilation (calling + * cv::GComputation::compileStreaming()). Objects of this class + * actually do stream processing, and the whole pipeline execution + * complexity is incapsulated into objects of this class. Execution + * model has two levels: at the very top, the execution of a + * heterogeneous graph is aggressively pipelined; at the very bottom + * the execution of every internal block is determined by its + * associated backend. Backends are selected based on kernel packages + * passed via compilation arguments ( see @ref gapi_compile_args, + * GNetworkPackage, GKernelPackage for details). + * + * GStreamingCompiled objects have a "player" semantics -- there are + * methods like start() and stop(). GStreamingCompiled has a full + * control over a videostream and so is stateful. You need to specify the + * input stream data using setSource() and then call start() to + * actually start processing. After that, use pull() or try_pull() to + * obtain next processed data frame from the graph in a blocking or + * non-blocking way, respectively. + * + * Currently a single GStreamingCompiled can process only one video + * streat at time. Produce multiple GStreamingCompiled objects to run the + * same graph on multiple video streams. + * + * @sa GCompiled + */ +class GAPI_EXPORTS GStreamingCompiled +{ +public: + class GAPI_EXPORTS Priv; + GStreamingCompiled(); + + // FIXME: More overloads? + /** + * @brief Specify the input data to GStreamingCompiled for + * processing, a generic version. + * + * Use gin() to create an input parameter vector. + * + * Input vectors must have the same number of elements as defined + * in the cv::GComputation protocol (at the moment of its + * construction). Shapes of elements also must conform to protocol + * (e.g. cv::Mat needs to be passed where cv::GMat has been + * declared as input, and so on). Run-time exception is generated + * on type mismatch. + * + * In contrast with regular GCompiled, user can also pass an + * object of type GVideoCapture for a GMat parameter of the parent + * GComputation. The compiled pipeline will start fetching data + * from that GVideoCapture and feeding it into the + * pipeline. Pipeline stops when a GVideoCapture marks end of the + * stream (or when stop() is called). + * + * Passing a regular Mat for a GMat parameter makes it "infinite" + * source -- pipeline may run forever feeding with this Mat until + * stopped explicitly. + * + * Currently only a single GVideoCapture is supported as input. If + * the parent GComputation is declared with multiple input GMat's, + * one of those can be specified as GVideoCapture but all others + * must be regular Mat objects. + * + * Throws if pipeline is already running. Use stop() and then + * setSource() to run the graph on a new video stream. + * + * @note This method is not thread-safe (with respect to the user + * side) at the moment. Protect the access if + * start()/stop()/setSource() may be called on the same object in + * multiple threads in your application. + * + * @param ins vector of inputs to process. + * @sa gin + */ + void setSource(GRunArgs &&ins); + + /** + * @brief Specify an input video stream for a single-input + * computation pipeline. + * + * Throws if pipeline is already running. Use stop() and then + * setSource() to run the graph on a new video stream. + * + * @overload + * @param s a shared pointer to IStreamSource representing the + * input video stream. + */ + void setSource(const gapi::wip::IStreamSource::Ptr& s); + + /** + * @brief Start the pipeline execution. + * + * Use pull()/try_pull() to obtain data. Throws an exception if + * a video source was not specified. + * + * setSource() must be called first, even if the pipeline has been + * working already and then stopped (explicitly via stop() or due + * stream completion) + * + * @note This method is not thread-safe (with respect to the user + * side) at the moment. Protect the access if + * start()/stop()/setSource() may be called on the same object in + * multiple threads in your application. + */ + void start(); + + /** + * @brief Get the next processed frame from the pipeline. + * + * Use gout() to create an output parameter vector. + * + * Output vectors must have the same number of elements as defined + * in the cv::GComputation protocol (at the moment of its + * construction). Shapes of elements also must conform to protocol + * (e.g. cv::Mat needs to be passed where cv::GMat has been + * declared as output, and so on). Run-time exception is generated + * on type mismatch. + * + * This method writes new data into objects passed via output + * vector. If there is no data ready yet, this method blocks. Use + * try_pull() if you need a non-blocking version. + * + * @param outs vector of output parameters to obtain. + * @return true if next result has been obtained, + * false marks end of the stream. + */ + bool pull(cv::GRunArgsP &&outs); + + /** + * @brief Try to get the next processed frame from the pipeline. + * + * Use gout() to create an output parameter vector. + * + * This method writes new data into objects passed via output + * vector. If there is no data ready yet, the output vector + * remains unchanged and false is returned. + * + * @return true if data has been obtained, and false if it was + * not. Note: false here doesn't mark the end of the stream. + */ + bool try_pull(cv::GRunArgsP &&outs); + + /** + * @brief Stop (abort) processing the pipeline. + * + * Note - it is not pause but a complete stop. Calling start() + * will cause G-API to start processing the stream from the early beginning. + * + * Throws if the pipeline is not running. + */ + void stop(); + + /** + * @brief Test if the pipeline is running. + * + * @note This method is not thread-safe (with respect to the user + * side) at the moment. Protect the access if + * start()/stop()/setSource() may be called on the same object in + * multiple threads in your application. + * + * @return true if the current stream is not over yet. + */ + bool running() const; + + /// @private + Priv& priv(); + + /** + * @brief Check if compiled object is valid (non-empty) + * + * @return true if the object is runnable (valid), false otherwise + */ + explicit operator bool () const; + + /** + * @brief Vector of metadata this graph was compiled for. + * + * @return Unless _reshape_ is not supported, return value is the + * same vector which was passed to cv::GComputation::compile() to + * produce this compiled object. Otherwise, it is the latest + * metadata vector passed to reshape() (if that call was + * successful). + */ + const GMetaArgs& metas() const; // Meta passed to compile() + + /** + * @brief Vector of metadata descriptions of graph outputs + * + * @return vector with formats/resolutions of graph's output + * objects, auto-inferred from input metadata vector by + * operations which form this computation. + * + * @note GCompiled objects produced from the same + * cv::GComputiation graph with different input metas may return + * different values in this vector. + */ + const GMetaArgs& outMetas() const; + +protected: + /// @private + std::shared_ptr m_priv; +}; +/** @} */ + +} + +#endif // OPENCV_GAPI_GSTREAMING_COMPILED_HPP diff --git a/IPL/include/opencv/opencv2/gapi/gtransform.hpp b/IPL/include/opencv/opencv2/gapi/gtransform.hpp new file mode 100644 index 0000000..5d1b91b --- /dev/null +++ b/IPL/include/opencv/opencv2/gapi/gtransform.hpp @@ -0,0 +1,103 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// +// Copyright (C) 2019 Intel Corporation + +#ifndef OPENCV_GAPI_GTRANSFORM_HPP +#define OPENCV_GAPI_GTRANSFORM_HPP + +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +namespace cv +{ + +struct GAPI_EXPORTS GTransform +{ + // FIXME: consider another simplified + // class instead of GComputation + using F = std::function; + + std::string description; + F pattern; + F substitute; + + GTransform(const std::string& d, const F &p, const F &s) : description(d), pattern(p), substitute(s){}; +}; + +namespace detail +{ + +template +struct TransHelper; + +template +struct TransHelper, Out> +{ + template + static GComputation invoke(Callable f, Seq, Seq) + { + const std::tuple ins; + const auto r = tuple_wrap_helper::get(f(std::get(ins)...)); + return GComputation(cv::GIn(std::get(ins)...), + cv::GOut(std::get(r)...)); + } + + static GComputation get_pattern() + { + return invoke(K::pattern, typename MkSeq::type(), + typename MkSeq::type>::value>::type()); + } + static GComputation get_substitute() + { + return invoke(K::substitute, typename MkSeq::type(), + typename MkSeq::type>::value>::type()); + } +}; +} // namespace detail + +template +class GTransformImpl; + +template +class GTransformImpl> : public cv::detail::TransHelper, R>, + public cv::detail::TransformTag +{ +public: + // FIXME: currently there is no check that transformations' signatures are unique + // and won't be any intersection in graph compilation stage + using API = K; + + static GTransform transformation() + { + return GTransform(K::descr(), &K::get_pattern, &K::get_substitute); + } +}; +} // namespace cv + +#define G_DESCR_HELPER_CLASS(Class) Class##DescrHelper + +#define G_DESCR_HELPER_BODY(Class, Descr) \ + namespace detail \ + { \ + struct G_DESCR_HELPER_CLASS(Class) \ + { \ + static constexpr const char *descr() { return Descr; }; \ + }; \ + } + +#define GAPI_TRANSFORM(Class, API, Descr) \ + G_DESCR_HELPER_BODY(Class, Descr) \ + struct Class final : public cv::GTransformImpl, \ + public detail::G_DESCR_HELPER_CLASS(Class) + +#endif // OPENCV_GAPI_GTRANSFORM_HPP diff --git a/IPL/include/opencv/opencv2/gapi/gtype_traits.hpp b/IPL/include/opencv/opencv2/gapi/gtype_traits.hpp new file mode 100644 index 0000000..fbdac15 --- /dev/null +++ b/IPL/include/opencv/opencv2/gapi/gtype_traits.hpp @@ -0,0 +1,188 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// +// Copyright (C) 2018 Intel Corporation + + +#ifndef OPENCV_GAPI_GTYPE_TRAITS_HPP +#define OPENCV_GAPI_GTYPE_TRAITS_HPP + +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +namespace cv +{ +namespace detail +{ + // FIXME: These traits and enum and possible numerous switch(kind) + // block may be replaced with a special Handler object or with + // a double dispatch + enum class ArgKind: int + { + OPAQUE_VAL, // Unknown, generic, opaque-to-GAPI data type - STATIC + // Note: OPAQUE is sometimes defined in Win sys headers +#if !defined(OPAQUE) && !defined(CV_DOXYGEN) + OPAQUE = OPAQUE_VAL, // deprecated value used for compatibility, use OPAQUE_VAL instead +#endif + GOBJREF, // reference to object + GMAT, // a cv::GMat + GMATP, // a cv::GMatP + GFRAME, // a cv::GFrame + GSCALAR, // a cv::GScalar + GARRAY, // a cv::GArrayU (note - exactly GArrayU, not GArray!) + GOPAQUE, // a cv::GOpaqueU (note - exactly GOpaqueU, not GOpaque!) + }; + + // Describe G-API types (G-types) with traits. Mostly used by + // cv::GArg to store meta information about types passed into + // operation arguments. Please note that cv::GComputation is + // defined on GProtoArgs, not GArgs! + template struct GTypeTraits; + template struct GTypeTraits + { + static constexpr const ArgKind kind = ArgKind::OPAQUE_VAL; + }; + template<> struct GTypeTraits + { + static constexpr const ArgKind kind = ArgKind::GMAT; + static constexpr const GShape shape = GShape::GMAT; + }; + template<> struct GTypeTraits + { + static constexpr const ArgKind kind = ArgKind::GMATP; + static constexpr const GShape shape = GShape::GMAT; + }; + template<> struct GTypeTraits + { + static constexpr const ArgKind kind = ArgKind::GFRAME; + static constexpr const GShape shape = GShape::GMAT; + }; + template<> struct GTypeTraits + { + static constexpr const ArgKind kind = ArgKind::GSCALAR; + static constexpr const GShape shape = GShape::GSCALAR; + }; + template struct GTypeTraits > + { + static constexpr const ArgKind kind = ArgKind::GARRAY; + static constexpr const GShape shape = GShape::GARRAY; + using host_type = std::vector; + using strip_type = cv::detail::VectorRef; + static cv::detail::GArrayU wrap_value(const cv::GArray &t) { return t.strip();} + static cv::detail::VectorRef wrap_in (const std::vector &t) { return detail::VectorRef(t); } + static cv::detail::VectorRef wrap_out ( std::vector &t) { return detail::VectorRef(t); } + }; + template struct GTypeTraits > + { + static constexpr const ArgKind kind = ArgKind::GOPAQUE; + static constexpr const GShape shape = GShape::GOPAQUE; + using host_type = T; + using strip_type = cv::detail::OpaqueRef; + static cv::detail::GOpaqueU wrap_value(const cv::GOpaque &t) { return t.strip();} + static cv::detail::OpaqueRef wrap_in (const T &t) { return detail::OpaqueRef(t); } + static cv::detail::OpaqueRef wrap_out ( T &t) { return detail::OpaqueRef(t); } + }; + + // Tests if Trait for type T requires extra marshalling ("custom wrap") or not. + // If Traits has wrap_value() defined, it does. + template struct has_custom_wrap + { + template class check; + template static std::true_type test(check::wrap_value)> *); + template static std::false_type test(...); + using type = decltype(test(nullptr)); + static const constexpr bool value = std::is_same(nullptr))>::value; + }; + + // Resolve a Host type back to its associated G-Type. + // FIXME: Probably it can be avoided + // FIXME: GMatP is not present here. + // (Actually these traits is used only to check + // if associated G-type has custom wrap functions + // and GMat behavior is correct for GMatP) + template struct GTypeOf; +#if !defined(GAPI_STANDALONE) + template<> struct GTypeOf { using type = cv::GMat; }; + template<> struct GTypeOf { using type = cv::GMat; }; +#endif // !defined(GAPI_STANDALONE) + template<> struct GTypeOf { using type = cv::GMat; }; + template<> struct GTypeOf { using type = cv::GScalar; }; + template struct GTypeOf > { using type = cv::GArray; }; + template struct GTypeOf { using type = cv::GOpaque;}; + // FIXME: This is not quite correct since IStreamSource may produce not only Mat but also Scalar + // and vector data. TODO: Extend the type dispatching on these types too. + template<> struct GTypeOf { using type = cv::GMat;}; + template using g_type_of_t = typename GTypeOf::type; + + // Marshalling helper for G-types and its Host types. Helps G-API + // to store G types in internal generic containers for further + // processing. Implements the following callbacks: + // + // * wrap() - converts user-facing G-type into an internal one + // for internal storage. + // Used when G-API operation is instantiated (G::on(), + // etc) during expressing a pipeline. Mostly returns input + // value "as is" except the case when G-type is a template. For + // template G-classes, calls custom wrap() from Traits. + // The value returned by wrap() is then wrapped into GArg() and + // stored in G-API metadata. + // + // Example: + // - cv::GMat arguments are passed as-is. + // - integers, pointers, STL containers, user types are passed as-is. + // - cv::GArray is converted to cv::GArrayU. + // + // * wrap_in() / wrap_out() - convert Host type associated with + // G-type to internal representation type. + // + // - For "simple" (non-template) G-types, returns value as-is. + // Example: cv::GMat has host type cv::Mat, when user passes a + // cv::Mat, system stores it internally as cv::Mat. + // + // - For "complex" (template) G-types, utilizes custom + // wrap_in()/wrap_out() as described in Traits. + // Example: cv::GArray has host type std::vector, when + // user passes a std::vector, system stores it + // internally as VectorRef (with stripped away). + template struct WrapValue + { + static auto wrap(const T& t) -> + typename std::remove_reference::type + { + return static_cast::type>(t); + } + + template static U wrap_in (const U &u) { return u; } + template static U* wrap_out(U &u) { return &u; } + }; + template struct WrapValue::value>::type> + { + static auto wrap(const T& t) -> decltype(GTypeTraits::wrap_value(t)) + { + return GTypeTraits::wrap_value(t); + } + template static auto wrap_in (const U &u) -> typename GTypeTraits::strip_type + { + return GTypeTraits::wrap_in(u); + } + template static auto wrap_out(U &u) -> typename GTypeTraits::strip_type + { + return GTypeTraits::wrap_out(u); + } + }; + + template using wrap_gapi_helper = WrapValue::type>; + template using wrap_host_helper = WrapValue >::type>; +} // namespace detail +} // namespace cv + +#endif // OPENCV_GAPI_GTYPE_TRAITS_HPP diff --git a/IPL/include/opencv/opencv2/gapi/gtyped.hpp b/IPL/include/opencv/opencv2/gapi/gtyped.hpp new file mode 100644 index 0000000..1ce6201 --- /dev/null +++ b/IPL/include/opencv/opencv2/gapi/gtyped.hpp @@ -0,0 +1,229 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// +// Copyright (C) 2018 Intel Corporation + + +#ifndef OPENCV_GAPI_GTYPED_HPP +#define OPENCV_GAPI_GTYPED_HPP +#if !defined(GAPI_STANDALONE) + +#include + +#include +#include +#include +#include + +namespace cv { + +namespace detail +{ + // FIXME: How to prevent coolhackers from extending it by their own types? + // FIXME: ...Should we care? + template struct ProtoToParam; + template<> struct ProtoToParam { using type = cv::Mat; }; + template<> struct ProtoToParam { using type = cv::Scalar; }; + template struct ProtoToParam > { using type = std::vector; }; + template struct ProtoToParam > { using type = U; }; + template using ProtoToParamT = typename ProtoToParam::type; + + template struct ProtoToMeta; + template<> struct ProtoToMeta { using type = cv::GMatDesc; }; + template<> struct ProtoToMeta { using type = cv::GScalarDesc; }; + template struct ProtoToMeta > { using type = cv::GArrayDesc; }; + template struct ProtoToMeta > { using type = cv::GOpaqueDesc; }; + template using ProtoToMetaT = typename ProtoToMeta::type; + + //workaround for MSVC 19.0 bug + template + auto make_default()->decltype(T{}) {return {};} +}; // detail + +/** + * @brief This class is a typed wrapper over a regular GComputation. + * + * `std::function<>`-like template parameter specifies the graph + * signature so methods so the object's constructor, methods like + * `apply()` and the derived `GCompiledT::operator()` also become + * typed. + * + * There is no need to use cv::gin() or cv::gout() modifiers with + * objects of this class. Instead, all input arguments are followed + * by all output arguments in the order from the template argument + * signature. + * + * Refer to the following example. Regular (untyped) code is written this way: + * + * @snippet modules/gapi/samples/api_ref_snippets.cpp Untyped_Example + * + * Here: + * + * - cv::GComputation object is created with a lambda constructor + * where it is defined as a two-input, one-output graph. + * + * - Its method `apply()` in fact takes arbitrary number of arguments + * (as vectors) so user can pass wrong number of inputs/outputs + * here. C++ compiler wouldn't notice that since the cv::GComputation + * API is polymorphic, and only a run-time error will be generated. + * + * Now the same code written with typed API: + * + * @snippet modules/gapi/samples/api_ref_snippets.cpp Typed_Example + * + * The key difference is: + * + * - Now the constructor lambda *must take* parameters and *must + * return* values as defined in the `GComputationT<>` signature. + * - Its method `apply()` does not require any extra specifiers to + * separate input arguments from the output ones + * - A `GCompiledT` (compilation product) takes input/output + * arguments with no extra specifiers as well. + */ +template class GComputationT; + +// Single return value implementation +template class GComputationT +{ +public: + typedef std::function Gen; + + class GCompiledT + { + private: + friend class GComputationT; + + cv::GCompiled m_comp; + + explicit GCompiledT(const cv::GCompiled &comp) : m_comp(comp) {} + + public: + GCompiledT() {} + + void operator()(detail::ProtoToParamT... inArgs, + detail::ProtoToParamT &outArg) + { + m_comp(cv::gin(inArgs...), cv::gout(outArg)); + } + + explicit operator bool() const + { + return static_cast(m_comp); + } + }; + +private: + typedef std::pair Captured; + + Captured capture(const Gen& g, Args... args) + { + return Captured(g(args...), cv::GIn(args...)); + } + + Captured m_capture; + cv::GComputation m_comp; + +public: + GComputationT(const Gen &generator) + : m_capture(capture(generator, detail::make_default()...)) + , m_comp(cv::GProtoInputArgs(std::move(m_capture.second)), + cv::GOut(m_capture.first)) + { + } + + void apply(detail::ProtoToParamT... inArgs, + detail::ProtoToParamT &outArg) + { + m_comp.apply(cv::gin(inArgs...), cv::gout(outArg)); + } + + GCompiledT compile(detail::ProtoToMetaT... inDescs) + { + GMetaArgs inMetas = { GMetaArg(inDescs)... }; + return GCompiledT(m_comp.compile(std::move(inMetas), GCompileArgs())); + } + + GCompiledT compile(detail::ProtoToMetaT... inDescs, GCompileArgs &&args) + { + GMetaArgs inMetas = { GMetaArg(inDescs)... }; + return GCompiledT(m_comp.compile(std::move(inMetas), std::move(args))); + } +}; + +// Multiple (fixed) return value implementation. FIXME: How to avoid copy-paste? +template class GComputationT(Args...)> +{ +public: + typedef std::function(Args...)> Gen; + + class GCompiledT + { + private: + friend class GComputationT(Args...)>; + + cv::GCompiled m_comp; + explicit GCompiledT(const cv::GCompiled &comp) : m_comp(comp) {} + + public: + GCompiledT() {} + + void operator()(detail::ProtoToParamT... inArgs, + detail::ProtoToParamT&... outArgs) + { + m_comp(cv::gin(inArgs...), cv::gout(outArgs...)); + } + + explicit operator bool() const + { + return static_cast(m_comp); + } + }; + +private: + typedef std::pair Captured; + + template + Captured capture(GProtoArgs &&args, const std::tuple &rr, detail::Seq) + { + return Captured(cv::GOut(std::get(rr)...).m_args, args); + } + + Captured capture(const Gen& g, Args... args) + { + return capture(cv::GIn(args...).m_args, g(args...), typename detail::MkSeq::type()); + } + + Captured m_capture; + cv::GComputation m_comp; + +public: + GComputationT(const Gen &generator) + : m_capture(capture(generator, detail::make_default()...)) + , m_comp(cv::GProtoInputArgs(std::move(m_capture.second)), + cv::GProtoOutputArgs(std::move(m_capture.first))) + { + } + + void apply(detail::ProtoToParamT... inArgs, + detail::ProtoToParamT&... outArgs) + { + m_comp.apply(cv::gin(inArgs...), cv::gout(outArgs...)); + } + + GCompiledT compile(detail::ProtoToMetaT... inDescs) + { + GMetaArgs inMetas = { GMetaArg(inDescs)... }; + return GCompiledT(m_comp.compile(std::move(inMetas), GCompileArgs())); + } + + GCompiledT compile(detail::ProtoToMetaT... inDescs, GCompileArgs &&args) + { + GMetaArgs inMetas = { GMetaArg(inDescs)... }; + return GCompiledT(m_comp.compile(std::move(inMetas), std::move(args))); + } +}; + +} // namespace cv +#endif // !defined(GAPI_STANDALONE) +#endif // OPENCV_GAPI_GTYPED_HPP diff --git a/IPL/include/opencv/opencv2/gapi/imgproc.hpp b/IPL/include/opencv/opencv2/gapi/imgproc.hpp new file mode 100644 index 0000000..4faf5e1 --- /dev/null +++ b/IPL/include/opencv/opencv2/gapi/imgproc.hpp @@ -0,0 +1,1009 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// +// Copyright (C) 2018-2020 Intel Corporation + + +#ifndef OPENCV_GAPI_IMGPROC_HPP +#define OPENCV_GAPI_IMGPROC_HPP + +#include + +#include // std::tuple + +#include +#include +#include + + +/** \defgroup gapi_imgproc G-API Image processing functionality +@{ + @defgroup gapi_filters Graph API: Image filters + @defgroup gapi_colorconvert Graph API: Converting image from one color space to another +@} + */ + +namespace cv { namespace gapi { + +namespace imgproc { + using GMat2 = std::tuple; + using GMat3 = std::tuple; // FIXME: how to avoid this? + + G_TYPED_KERNEL(GFilter2D, ,"org.opencv.imgproc.filters.filter2D") { + static GMatDesc outMeta(GMatDesc in, int ddepth, Mat, Point, Scalar, int, Scalar) { + return in.withDepth(ddepth); + } + }; + + G_TYPED_KERNEL(GSepFilter, , "org.opencv.imgproc.filters.sepfilter") { + static GMatDesc outMeta(GMatDesc in, int ddepth, Mat, Mat, Point, Scalar, int, Scalar) { + return in.withDepth(ddepth); + } + }; + + G_TYPED_KERNEL(GBoxFilter, , "org.opencv.imgproc.filters.boxfilter") { + static GMatDesc outMeta(GMatDesc in, int ddepth, Size, Point, bool, int, Scalar) { + return in.withDepth(ddepth); + } + }; + + G_TYPED_KERNEL(GBlur, , "org.opencv.imgproc.filters.blur"){ + static GMatDesc outMeta(GMatDesc in, Size, Point, int, Scalar) { + return in; + } + }; + + G_TYPED_KERNEL(GGaussBlur, , "org.opencv.imgproc.filters.gaussianBlur") { + static GMatDesc outMeta(GMatDesc in, Size, double, double, int, Scalar) { + return in; + } + }; + + G_TYPED_KERNEL(GMedianBlur, , "org.opencv.imgproc.filters.medianBlur") { + static GMatDesc outMeta(GMatDesc in, int) { + return in; + } + }; + + G_TYPED_KERNEL(GErode, , "org.opencv.imgproc.filters.erode") { + static GMatDesc outMeta(GMatDesc in, Mat, Point, int, int, Scalar) { + return in; + } + }; + + G_TYPED_KERNEL(GDilate, , "org.opencv.imgproc.filters.dilate") { + static GMatDesc outMeta(GMatDesc in, Mat, Point, int, int, Scalar) { + return in; + } + }; + + G_TYPED_KERNEL(GSobel, , "org.opencv.imgproc.filters.sobel") { + static GMatDesc outMeta(GMatDesc in, int ddepth, int, int, int, double, double, int, Scalar) { + return in.withDepth(ddepth); + } + }; + + G_TYPED_KERNEL_M(GSobelXY, , "org.opencv.imgproc.filters.sobelxy") { + static std::tuple outMeta(GMatDesc in, int ddepth, int, int, double, double, int, Scalar) { + return std::make_tuple(in.withDepth(ddepth), in.withDepth(ddepth)); + } + }; + + G_TYPED_KERNEL(GEqHist, , "org.opencv.imgproc.equalizeHist"){ + static GMatDesc outMeta(GMatDesc in) { + return in.withType(CV_8U, 1); + } + }; + + G_TYPED_KERNEL(GCanny, , "org.opencv.imgproc.canny"){ + static GMatDesc outMeta(GMatDesc in, double, double, int, bool) { + return in.withType(CV_8U, 1); + } + }; + + G_TYPED_KERNEL(GGoodFeatures, + (GMat,int,double,double,Mat,int,bool,double)>, + "org.opencv.imgproc.goodFeaturesToTrack") { + static GArrayDesc outMeta(GMatDesc, int, double, double, const Mat&, int, bool, double) { + return empty_array_desc(); + } + }; + + G_TYPED_KERNEL(GRGB2YUV, , "org.opencv.imgproc.colorconvert.rgb2yuv") { + static GMatDesc outMeta(GMatDesc in) { + return in; // type still remains CV_8UC3; + } + }; + + G_TYPED_KERNEL(GYUV2RGB, , "org.opencv.imgproc.colorconvert.yuv2rgb") { + static GMatDesc outMeta(GMatDesc in) { + return in; // type still remains CV_8UC3; + } + }; + + G_TYPED_KERNEL(GNV12toRGB, , "org.opencv.imgproc.colorconvert.nv12torgb") { + static GMatDesc outMeta(GMatDesc in_y, GMatDesc in_uv) { + GAPI_Assert(in_y.chan == 1); + GAPI_Assert(in_uv.chan == 2); + GAPI_Assert(in_y.depth == CV_8U); + GAPI_Assert(in_uv.depth == CV_8U); + // UV size should be aligned with Y + GAPI_Assert(in_y.size.width == 2 * in_uv.size.width); + GAPI_Assert(in_y.size.height == 2 * in_uv.size.height); + return in_y.withType(CV_8U, 3); // type will be CV_8UC3; + } + }; + + G_TYPED_KERNEL(GNV12toBGR, , "org.opencv.imgproc.colorconvert.nv12tobgr") { + static GMatDesc outMeta(GMatDesc in_y, GMatDesc in_uv) { + GAPI_Assert(in_y.chan == 1); + GAPI_Assert(in_uv.chan == 2); + GAPI_Assert(in_y.depth == CV_8U); + GAPI_Assert(in_uv.depth == CV_8U); + // UV size should be aligned with Y + GAPI_Assert(in_y.size.width == 2 * in_uv.size.width); + GAPI_Assert(in_y.size.height == 2 * in_uv.size.height); + return in_y.withType(CV_8U, 3); // type will be CV_8UC3; + } + }; + + G_TYPED_KERNEL(GRGB2Lab, , "org.opencv.imgproc.colorconvert.rgb2lab") { + static GMatDesc outMeta(GMatDesc in) { + return in; // type still remains CV_8UC3; + } + }; + + G_TYPED_KERNEL(GBGR2LUV, , "org.opencv.imgproc.colorconvert.bgr2luv") { + static GMatDesc outMeta(GMatDesc in) { + return in; // type still remains CV_8UC3; + } + }; + + G_TYPED_KERNEL(GLUV2BGR, , "org.opencv.imgproc.colorconvert.luv2bgr") { + static GMatDesc outMeta(GMatDesc in) { + return in; // type still remains CV_8UC3; + } + }; + + G_TYPED_KERNEL(GYUV2BGR, , "org.opencv.imgproc.colorconvert.yuv2bgr") { + static GMatDesc outMeta(GMatDesc in) { + return in; // type still remains CV_8UC3; + } + }; + + G_TYPED_KERNEL(GBGR2YUV, , "org.opencv.imgproc.colorconvert.bgr2yuv") { + static GMatDesc outMeta(GMatDesc in) { + return in; // type still remains CV_8UC3; + } + }; + + G_TYPED_KERNEL(GRGB2Gray, , "org.opencv.imgproc.colorconvert.rgb2gray") { + static GMatDesc outMeta(GMatDesc in) { + return in.withType(CV_8U, 1); + } + }; + + G_TYPED_KERNEL(GRGB2GrayCustom, , "org.opencv.imgproc.colorconvert.rgb2graycustom") { + static GMatDesc outMeta(GMatDesc in, float, float, float) { + return in.withType(CV_8U, 1); + } + }; + + G_TYPED_KERNEL(GBGR2Gray, , "org.opencv.imgproc.colorconvert.bgr2gray") { + static GMatDesc outMeta(GMatDesc in) { + return in.withType(CV_8U, 1); + } + }; + + G_TYPED_KERNEL(GBayerGR2RGB, , "org.opencv.imgproc.colorconvert.bayergr2rgb") { + static cv::GMatDesc outMeta(cv::GMatDesc in) { + return in.withType(CV_8U, 3); + } + }; + + G_TYPED_KERNEL(GRGB2HSV, , "org.opencv.imgproc.colorconvert.rgb2hsv") { + static cv::GMatDesc outMeta(cv::GMatDesc in) { + return in; + } + }; + + G_TYPED_KERNEL(GRGB2YUV422, , "org.opencv.imgproc.colorconvert.rgb2yuv422") { + static cv::GMatDesc outMeta(cv::GMatDesc in) { + GAPI_Assert(in.depth == CV_8U); + GAPI_Assert(in.chan == 3); + return in.withType(in.depth, 2); + } + }; + + G_TYPED_KERNEL(GNV12toRGBp, , "org.opencv.colorconvert.imgproc.nv12torgbp") { + static GMatDesc outMeta(GMatDesc inY, GMatDesc inUV) { + GAPI_Assert(inY.depth == CV_8U); + GAPI_Assert(inUV.depth == CV_8U); + GAPI_Assert(inY.chan == 1); + GAPI_Assert(inY.planar == false); + GAPI_Assert(inUV.chan == 2); + GAPI_Assert(inUV.planar == false); + GAPI_Assert(inY.size.width == 2 * inUV.size.width); + GAPI_Assert(inY.size.height == 2 * inUV.size.height); + return inY.withType(CV_8U, 3).asPlanar(); + } + }; + + G_TYPED_KERNEL(GNV12toGray, , "org.opencv.colorconvert.imgproc.nv12togray") { + static GMatDesc outMeta(GMatDesc inY, GMatDesc inUV) { + GAPI_Assert(inY.depth == CV_8U); + GAPI_Assert(inUV.depth == CV_8U); + GAPI_Assert(inY.chan == 1); + GAPI_Assert(inY.planar == false); + GAPI_Assert(inUV.chan == 2); + GAPI_Assert(inUV.planar == false); + + GAPI_Assert(inY.size.width == 2 * inUV.size.width); + GAPI_Assert(inY.size.height == 2 * inUV.size.height); + return inY.withType(CV_8U, 1); + } + }; + + G_TYPED_KERNEL(GNV12toBGRp, , "org.opencv.colorconvert.imgproc.nv12tobgrp") { + static GMatDesc outMeta(GMatDesc inY, GMatDesc inUV) { + GAPI_Assert(inY.depth == CV_8U); + GAPI_Assert(inUV.depth == CV_8U); + GAPI_Assert(inY.chan == 1); + GAPI_Assert(inY.planar == false); + GAPI_Assert(inUV.chan == 2); + GAPI_Assert(inUV.planar == false); + GAPI_Assert(inY.size.width == 2 * inUV.size.width); + GAPI_Assert(inY.size.height == 2 * inUV.size.height); + return inY.withType(CV_8U, 3).asPlanar(); + } + }; + +} //namespace imgproc + +//! @addtogroup gapi_filters +//! @{ +/** @brief Applies a separable linear filter to a matrix(image). + +The function applies a separable linear filter to the matrix. That is, first, every row of src is +filtered with the 1D kernel kernelX. Then, every column of the result is filtered with the 1D +kernel kernelY. The final result is returned. + +Supported matrix data types are @ref CV_8UC1, @ref CV_8UC3, @ref CV_16UC1, @ref CV_16SC1, @ref CV_32FC1. +Output image must have the same type, size, and number of channels as the input image. +@note In case of floating-point computation, rounding to nearest even is procedeed +if hardware supports it (if not - to nearest value). + +@note Function textual ID is "org.opencv.imgproc.filters.sepfilter" +@param src Source image. +@param ddepth desired depth of the destination image (the following combinations of src.depth() and ddepth are supported: + + src.depth() = CV_8U, ddepth = -1/CV_16S/CV_32F/CV_64F + src.depth() = CV_16U/CV_16S, ddepth = -1/CV_32F/CV_64F + src.depth() = CV_32F, ddepth = -1/CV_32F/CV_64F + src.depth() = CV_64F, ddepth = -1/CV_64F + +when ddepth=-1, the output image will have the same depth as the source) +@param kernelX Coefficients for filtering each row. +@param kernelY Coefficients for filtering each column. +@param anchor Anchor position within the kernel. The default value \f$(-1,-1)\f$ means that the anchor +is at the kernel center. +@param delta Value added to the filtered results before storing them. +@param borderType Pixel extrapolation method, see cv::BorderTypes +@param borderValue border value in case of constant border type +@sa boxFilter, gaussianBlur, medianBlur + */ +GAPI_EXPORTS GMat sepFilter(const GMat& src, int ddepth, const Mat& kernelX, const Mat& kernelY, const Point& anchor /*FIXME: = Point(-1,-1)*/, + const Scalar& delta /*FIXME = GScalar(0)*/, int borderType = BORDER_DEFAULT, + const Scalar& borderValue = Scalar(0)); + +/** @brief Convolves an image with the kernel. + +The function applies an arbitrary linear filter to an image. When +the aperture is partially outside the image, the function interpolates outlier pixel values +according to the specified border mode. + +The function does actually compute correlation, not the convolution: + +\f[\texttt{dst} (x,y) = \sum _{ \stackrel{0\leq x' < \texttt{kernel.cols},}{0\leq y' < \texttt{kernel.rows}} } \texttt{kernel} (x',y')* \texttt{src} (x+x'- \texttt{anchor.x} ,y+y'- \texttt{anchor.y} )\f] + +That is, the kernel is not mirrored around the anchor point. If you need a real convolution, flip +the kernel using flip and set the new anchor to `(kernel.cols - anchor.x - 1, kernel.rows - +anchor.y - 1)`. + +Supported matrix data types are @ref CV_8UC1, @ref CV_8UC3, @ref CV_16UC1, @ref CV_16SC1, @ref CV_32FC1. +Output image must have the same size and number of channels an input image. +@note Rounding to nearest even is procedeed if hardware supports it, if not - to nearest. + +@note Function textual ID is "org.opencv.imgproc.filters.filter2D" + +@param src input image. +@param ddepth desired depth of the destination image +@param kernel convolution kernel (or rather a correlation kernel), a single-channel floating point +matrix; if you want to apply different kernels to different channels, split the image into +separate color planes using split and process them individually. +@param anchor anchor of the kernel that indicates the relative position of a filtered point within +the kernel; the anchor should lie within the kernel; default value (-1,-1) means that the anchor +is at the kernel center. +@param delta optional value added to the filtered pixels before storing them in dst. +@param borderType pixel extrapolation method, see cv::BorderTypes +@param borderValue border value in case of constant border type +@sa sepFilter + */ +GAPI_EXPORTS GMat filter2D(const GMat& src, int ddepth, const Mat& kernel, const Point& anchor = Point(-1,-1), const Scalar& delta = Scalar(0), + int borderType = BORDER_DEFAULT, const Scalar& borderValue = Scalar(0)); + + +/** @brief Blurs an image using the box filter. + +The function smooths an image using the kernel: + +\f[\texttt{K} = \alpha \begin{bmatrix} 1 & 1 & 1 & \cdots & 1 & 1 \\ 1 & 1 & 1 & \cdots & 1 & 1 \\ \hdotsfor{6} \\ 1 & 1 & 1 & \cdots & 1 & 1 \end{bmatrix}\f] + +where + +\f[\alpha = \fork{\frac{1}{\texttt{ksize.width*ksize.height}}}{when \texttt{normalize=true}}{1}{otherwise}\f] + +Unnormalized box filter is useful for computing various integral characteristics over each pixel +neighborhood, such as covariance matrices of image derivatives (used in dense optical flow +algorithms, and so on). If you need to compute pixel sums over variable-size windows, use cv::integral. + +Supported input matrix data types are @ref CV_8UC1, @ref CV_8UC3, @ref CV_16UC1, @ref CV_16SC1, @ref CV_32FC1. +Output image must have the same type, size, and number of channels as the input image. +@note Rounding to nearest even is procedeed if hardware supports it, if not - to nearest. + +@note Function textual ID is "org.opencv.imgproc.filters.boxfilter" + +@param src Source image. +@param dtype the output image depth (-1 to set the input image data type). +@param ksize blurring kernel size. +@param anchor Anchor position within the kernel. The default value \f$(-1,-1)\f$ means that the anchor +is at the kernel center. +@param normalize flag, specifying whether the kernel is normalized by its area or not. +@param borderType Pixel extrapolation method, see cv::BorderTypes +@param borderValue border value in case of constant border type +@sa sepFilter, gaussianBlur, medianBlur, integral + */ +GAPI_EXPORTS GMat boxFilter(const GMat& src, int dtype, const Size& ksize, const Point& anchor = Point(-1,-1), + bool normalize = true, int borderType = BORDER_DEFAULT, + const Scalar& borderValue = Scalar(0)); + +/** @brief Blurs an image using the normalized box filter. + +The function smooths an image using the kernel: + +\f[\texttt{K} = \frac{1}{\texttt{ksize.width*ksize.height}} \begin{bmatrix} 1 & 1 & 1 & \cdots & 1 & 1 \\ 1 & 1 & 1 & \cdots & 1 & 1 \\ \hdotsfor{6} \\ 1 & 1 & 1 & \cdots & 1 & 1 \\ \end{bmatrix}\f] + +The call `blur(src, dst, ksize, anchor, borderType)` is equivalent to `boxFilter(src, dst, src.type(), +anchor, true, borderType)`. + +Supported input matrix data types are @ref CV_8UC1, @ref CV_8UC3, @ref CV_16UC1, @ref CV_16SC1, @ref CV_32FC1. +Output image must have the same type, size, and number of channels as the input image. +@note Rounding to nearest even is procedeed if hardware supports it, if not - to nearest. + +@note Function textual ID is "org.opencv.imgproc.filters.blur" + +@param src Source image. +@param ksize blurring kernel size. +@param anchor anchor point; default value Point(-1,-1) means that the anchor is at the kernel +center. +@param borderType border mode used to extrapolate pixels outside of the image, see cv::BorderTypes +@param borderValue border value in case of constant border type +@sa boxFilter, bilateralFilter, GaussianBlur, medianBlur + */ +GAPI_EXPORTS GMat blur(const GMat& src, const Size& ksize, const Point& anchor = Point(-1,-1), + int borderType = BORDER_DEFAULT, const Scalar& borderValue = Scalar(0)); + + +//GAPI_EXPORTS_W void blur( InputArray src, OutputArray dst, + // Size ksize, Point anchor = Point(-1,-1), + // int borderType = BORDER_DEFAULT ); + + +/** @brief Blurs an image using a Gaussian filter. + +The function filter2Ds the source image with the specified Gaussian kernel. +Output image must have the same type and number of channels an input image. + +Supported input matrix data types are @ref CV_8UC1, @ref CV_8UC3, @ref CV_16UC1, @ref CV_16SC1, @ref CV_32FC1. +Output image must have the same type, size, and number of channels as the input image. +@note Rounding to nearest even is procedeed if hardware supports it, if not - to nearest. + +@note Function textual ID is "org.opencv.imgproc.filters.gaussianBlur" + +@param src input image; +@param ksize Gaussian kernel size. ksize.width and ksize.height can differ but they both must be +positive and odd. Or, they can be zero's and then they are computed from sigma. +@param sigmaX Gaussian kernel standard deviation in X direction. +@param sigmaY Gaussian kernel standard deviation in Y direction; if sigmaY is zero, it is set to be +equal to sigmaX, if both sigmas are zeros, they are computed from ksize.width and ksize.height, +respectively (see cv::getGaussianKernel for details); to fully control the result regardless of +possible future modifications of all this semantics, it is recommended to specify all of ksize, +sigmaX, and sigmaY. +@param borderType pixel extrapolation method, see cv::BorderTypes +@param borderValue border value in case of constant border type +@sa sepFilter, boxFilter, medianBlur + */ +GAPI_EXPORTS GMat gaussianBlur(const GMat& src, const Size& ksize, double sigmaX, double sigmaY = 0, + int borderType = BORDER_DEFAULT, const Scalar& borderValue = Scalar(0)); + +/** @brief Blurs an image using the median filter. + +The function smoothes an image using the median filter with the \f$\texttt{ksize} \times +\texttt{ksize}\f$ aperture. Each channel of a multi-channel image is processed independently. +Output image must have the same type, size, and number of channels as the input image. +@note Rounding to nearest even is procedeed if hardware supports it, if not - to nearest. +The median filter uses cv::BORDER_REPLICATE internally to cope with border pixels, see cv::BorderTypes + +@note Function textual ID is "org.opencv.imgproc.filters.medianBlur" + +@param src input matrix (image) +@param ksize aperture linear size; it must be odd and greater than 1, for example: 3, 5, 7 ... +@sa boxFilter, gaussianBlur + */ +GAPI_EXPORTS GMat medianBlur(const GMat& src, int ksize); + +/** @brief Erodes an image by using a specific structuring element. + +The function erodes the source image using the specified structuring element that determines the +shape of a pixel neighborhood over which the minimum is taken: + +\f[\texttt{dst} (x,y) = \min _{(x',y'): \, \texttt{element} (x',y') \ne0 } \texttt{src} (x+x',y+y')\f] + +Erosion can be applied several (iterations) times. In case of multi-channel images, each channel is processed independently. +Supported input matrix data types are @ref CV_8UC1, @ref CV_8UC3, @ref CV_16UC1, @ref CV_16SC1, and @ref CV_32FC1. +Output image must have the same type, size, and number of channels as the input image. +@note Rounding to nearest even is procedeed if hardware supports it, if not - to nearest. + +@note Function textual ID is "org.opencv.imgproc.filters.erode" + +@param src input image +@param kernel structuring element used for erosion; if `element=Mat()`, a `3 x 3` rectangular +structuring element is used. Kernel can be created using getStructuringElement. +@param anchor position of the anchor within the element; default value (-1, -1) means that the +anchor is at the element center. +@param iterations number of times erosion is applied. +@param borderType pixel extrapolation method, see cv::BorderTypes +@param borderValue border value in case of a constant border +@sa dilate + */ +GAPI_EXPORTS GMat erode(const GMat& src, const Mat& kernel, const Point& anchor = Point(-1,-1), int iterations = 1, + int borderType = BORDER_CONSTANT, + const Scalar& borderValue = morphologyDefaultBorderValue()); + +/** @brief Erodes an image by using 3 by 3 rectangular structuring element. + +The function erodes the source image using the rectangular structuring element with rectangle center as an anchor. +Erosion can be applied several (iterations) times. In case of multi-channel images, each channel is processed independently. +Supported input matrix data types are @ref CV_8UC1, @ref CV_8UC3, @ref CV_16UC1, @ref CV_16SC1, and @ref CV_32FC1. +Output image must have the same type, size, and number of channels as the input image. +@note Rounding to nearest even is procedeed if hardware supports it, if not - to nearest. + +@param src input image +@param iterations number of times erosion is applied. +@param borderType pixel extrapolation method, see cv::BorderTypes +@param borderValue border value in case of a constant border +@sa erode, dilate3x3 + */ +GAPI_EXPORTS GMat erode3x3(const GMat& src, int iterations = 1, + int borderType = BORDER_CONSTANT, + const Scalar& borderValue = morphologyDefaultBorderValue()); + +/** @brief Dilates an image by using a specific structuring element. + +The function dilates the source image using the specified structuring element that determines the +shape of a pixel neighborhood over which the maximum is taken: +\f[\texttt{dst} (x,y) = \max _{(x',y'): \, \texttt{element} (x',y') \ne0 } \texttt{src} (x+x',y+y')\f] + +Dilation can be applied several (iterations) times. In case of multi-channel images, each channel is processed independently. +Supported input matrix data types are @ref CV_8UC1, @ref CV_8UC3, @ref CV_16UC1, @ref CV_16SC1, and @ref CV_32FC1. +Output image must have the same type, size, and number of channels as the input image. +@note Rounding to nearest even is procedeed if hardware supports it, if not - to nearest. + +@note Function textual ID is "org.opencv.imgproc.filters.dilate" + +@param src input image. +@param kernel structuring element used for dilation; if elemenat=Mat(), a 3 x 3 rectangular +structuring element is used. Kernel can be created using getStructuringElement +@param anchor position of the anchor within the element; default value (-1, -1) means that the +anchor is at the element center. +@param iterations number of times dilation is applied. +@param borderType pixel extrapolation method, see cv::BorderTypes +@param borderValue border value in case of a constant border +@sa erode, morphologyEx, getStructuringElement + */ +GAPI_EXPORTS GMat dilate(const GMat& src, const Mat& kernel, const Point& anchor = Point(-1,-1), int iterations = 1, + int borderType = BORDER_CONSTANT, + const Scalar& borderValue = morphologyDefaultBorderValue()); + +/** @brief Dilates an image by using 3 by 3 rectangular structuring element. + +The function dilates the source image using the specified structuring element that determines the +shape of a pixel neighborhood over which the maximum is taken: +\f[\texttt{dst} (x,y) = \max _{(x',y'): \, \texttt{element} (x',y') \ne0 } \texttt{src} (x+x',y+y')\f] + +Dilation can be applied several (iterations) times. In case of multi-channel images, each channel is processed independently. +Supported input matrix data types are @ref CV_8UC1, @ref CV_8UC3, @ref CV_16UC1, @ref CV_16SC1, and @ref CV_32FC1. +Output image must have the same type, size, and number of channels as the input image. +@note Rounding to nearest even is procedeed if hardware supports it, if not - to nearest. + +@note Function textual ID is "org.opencv.imgproc.filters.dilate" + +@param src input image. +@param iterations number of times dilation is applied. +@param borderType pixel extrapolation method, see cv::BorderTypes +@param borderValue border value in case of a constant border +@sa dilate, erode3x3 + */ + +GAPI_EXPORTS GMat dilate3x3(const GMat& src, int iterations = 1, + int borderType = BORDER_CONSTANT, + const Scalar& borderValue = morphologyDefaultBorderValue()); + +/** @brief Calculates the first, second, third, or mixed image derivatives using an extended Sobel operator. + +In all cases except one, the \f$\texttt{ksize} \times \texttt{ksize}\f$ separable kernel is used to +calculate the derivative. When \f$\texttt{ksize = 1}\f$, the \f$3 \times 1\f$ or \f$1 \times 3\f$ +kernel is used (that is, no Gaussian smoothing is done). `ksize = 1` can only be used for the first +or the second x- or y- derivatives. + +There is also the special value `ksize = FILTER_SCHARR (-1)` that corresponds to the \f$3\times3\f$ Scharr +filter that may give more accurate results than the \f$3\times3\f$ Sobel. The Scharr aperture is + +\f[\vecthreethree{-3}{0}{3}{-10}{0}{10}{-3}{0}{3}\f] + +for the x-derivative, or transposed for the y-derivative. + +The function calculates an image derivative by convolving the image with the appropriate kernel: + +\f[\texttt{dst} = \frac{\partial^{xorder+yorder} \texttt{src}}{\partial x^{xorder} \partial y^{yorder}}\f] + +The Sobel operators combine Gaussian smoothing and differentiation, so the result is more or less +resistant to the noise. Most often, the function is called with ( xorder = 1, yorder = 0, ksize = 3) +or ( xorder = 0, yorder = 1, ksize = 3) to calculate the first x- or y- image derivative. The first +case corresponds to a kernel of: + +\f[\vecthreethree{-1}{0}{1}{-2}{0}{2}{-1}{0}{1}\f] + +The second case corresponds to a kernel of: + +\f[\vecthreethree{-1}{-2}{-1}{0}{0}{0}{1}{2}{1}\f] + +@note Rounding to nearest even is procedeed if hardware supports it, if not - to nearest. + +@note Function textual ID is "org.opencv.imgproc.filters.sobel" + +@param src input image. +@param ddepth output image depth, see @ref filter_depths "combinations"; in the case of + 8-bit input images it will result in truncated derivatives. +@param dx order of the derivative x. +@param dy order of the derivative y. +@param ksize size of the extended Sobel kernel; it must be odd. +@param scale optional scale factor for the computed derivative values; by default, no scaling is +applied (see cv::getDerivKernels for details). +@param delta optional delta value that is added to the results prior to storing them in dst. +@param borderType pixel extrapolation method, see cv::BorderTypes +@param borderValue border value in case of constant border type +@sa filter2D, gaussianBlur, cartToPolar + */ +GAPI_EXPORTS GMat Sobel(const GMat& src, int ddepth, int dx, int dy, int ksize = 3, + double scale = 1, double delta = 0, + int borderType = BORDER_DEFAULT, + const Scalar& borderValue = Scalar(0)); + +/** @brief Calculates the first, second, third, or mixed image derivatives using an extended Sobel operator. + +In all cases except one, the \f$\texttt{ksize} \times \texttt{ksize}\f$ separable kernel is used to +calculate the derivative. When \f$\texttt{ksize = 1}\f$, the \f$3 \times 1\f$ or \f$1 \times 3\f$ +kernel is used (that is, no Gaussian smoothing is done). `ksize = 1` can only be used for the first +or the second x- or y- derivatives. + +There is also the special value `ksize = FILTER_SCHARR (-1)` that corresponds to the \f$3\times3\f$ Scharr +filter that may give more accurate results than the \f$3\times3\f$ Sobel. The Scharr aperture is + +\f[\vecthreethree{-3}{0}{3}{-10}{0}{10}{-3}{0}{3}\f] + +for the x-derivative, or transposed for the y-derivative. + +The function calculates an image derivative by convolving the image with the appropriate kernel: + +\f[\texttt{dst} = \frac{\partial^{xorder+yorder} \texttt{src}}{\partial x^{xorder} \partial y^{yorder}}\f] + +The Sobel operators combine Gaussian smoothing and differentiation, so the result is more or less +resistant to the noise. Most often, the function is called with ( xorder = 1, yorder = 0, ksize = 3) +or ( xorder = 0, yorder = 1, ksize = 3) to calculate the first x- or y- image derivative. The first +case corresponds to a kernel of: + +\f[\vecthreethree{-1}{0}{1}{-2}{0}{2}{-1}{0}{1}\f] + +The second case corresponds to a kernel of: + +\f[\vecthreethree{-1}{-2}{-1}{0}{0}{0}{1}{2}{1}\f] + +@note First returned matrix correspons to dx derivative while the second one to dy. + +@note Rounding to nearest even is procedeed if hardware supports it, if not - to nearest. + +@note Function textual ID is "org.opencv.imgproc.filters.sobelxy" + +@param src input image. +@param ddepth output image depth, see @ref filter_depths "combinations"; in the case of + 8-bit input images it will result in truncated derivatives. +@param order order of the derivatives. +@param ksize size of the extended Sobel kernel; it must be odd. +@param scale optional scale factor for the computed derivative values; by default, no scaling is +applied (see cv::getDerivKernels for details). +@param delta optional delta value that is added to the results prior to storing them in dst. +@param borderType pixel extrapolation method, see cv::BorderTypes +@param borderValue border value in case of constant border type +@sa filter2D, gaussianBlur, cartToPolar + */ +GAPI_EXPORTS std::tuple SobelXY(const GMat& src, int ddepth, int order, int ksize = 3, + double scale = 1, double delta = 0, + int borderType = BORDER_DEFAULT, + const Scalar& borderValue = Scalar(0)); + +/** @brief Finds edges in an image using the Canny algorithm. + +The function finds edges in the input image and marks them in the output map edges using the +Canny algorithm. The smallest value between threshold1 and threshold2 is used for edge linking. The +largest value is used to find initial segments of strong edges. See + + +@note Function textual ID is "org.opencv.imgproc.filters.canny" + +@param image 8-bit input image. +@param threshold1 first threshold for the hysteresis procedure. +@param threshold2 second threshold for the hysteresis procedure. +@param apertureSize aperture size for the Sobel operator. +@param L2gradient a flag, indicating whether a more accurate \f$L_2\f$ norm +\f$=\sqrt{(dI/dx)^2 + (dI/dy)^2}\f$ should be used to calculate the image gradient magnitude ( +L2gradient=true ), or whether the default \f$L_1\f$ norm \f$=|dI/dx|+|dI/dy|\f$ is enough ( +L2gradient=false ). + */ +GAPI_EXPORTS GMat Canny(const GMat& image, double threshold1, double threshold2, + int apertureSize = 3, bool L2gradient = false); + +/** @brief Determines strong corners on an image. + +The function finds the most prominent corners in the image or in the specified image region, as +described in @cite Shi94 + +- Function calculates the corner quality measure at every source image pixel using the + #cornerMinEigenVal or #cornerHarris . +- Function performs a non-maximum suppression (the local maximums in *3 x 3* neighborhood are + retained). +- The corners with the minimal eigenvalue less than + \f$\texttt{qualityLevel} \cdot \max_{x,y} qualityMeasureMap(x,y)\f$ are rejected. +- The remaining corners are sorted by the quality measure in the descending order. +- Function throws away each corner for which there is a stronger corner at a distance less than + maxDistance. + +The function can be used to initialize a point-based tracker of an object. + +@note If the function is called with different values A and B of the parameter qualityLevel , and +A \> B, the vector of returned corners with qualityLevel=A will be the prefix of the output vector +with qualityLevel=B . + +@note Function textual ID is "org.opencv.imgproc.goodFeaturesToTrack" + +@param image Input 8-bit or floating-point 32-bit, single-channel image. +@param maxCorners Maximum number of corners to return. If there are more corners than are found, +the strongest of them is returned. `maxCorners <= 0` implies that no limit on the maximum is set +and all detected corners are returned. +@param qualityLevel Parameter characterizing the minimal accepted quality of image corners. The +parameter value is multiplied by the best corner quality measure, which is the minimal eigenvalue +(see #cornerMinEigenVal ) or the Harris function response (see #cornerHarris ). The corners with the +quality measure less than the product are rejected. For example, if the best corner has the +quality measure = 1500, and the qualityLevel=0.01 , then all the corners with the quality measure +less than 15 are rejected. +@param minDistance Minimum possible Euclidean distance between the returned corners. +@param mask Optional region of interest. If the image is not empty (it needs to have the type +CV_8UC1 and the same size as image ), it specifies the region in which the corners are detected. +@param blockSize Size of an average block for computing a derivative covariation matrix over each +pixel neighborhood. See cornerEigenValsAndVecs . +@param useHarrisDetector Parameter indicating whether to use a Harris detector (see #cornerHarris) +or #cornerMinEigenVal. +@param k Free parameter of the Harris detector. + +@return vector of detected corners. + */ +GAPI_EXPORTS GArray goodFeaturesToTrack(const GMat &image, + int maxCorners, + double qualityLevel, + double minDistance, + const Mat &mask = Mat(), + int blockSize = 3, + bool useHarrisDetector = false, + double k = 0.04); + +/** @brief Equalizes the histogram of a grayscale image. + +The function equalizes the histogram of the input image using the following algorithm: + +- Calculate the histogram \f$H\f$ for src . +- Normalize the histogram so that the sum of histogram bins is 255. +- Compute the integral of the histogram: +\f[H'_i = \sum _{0 \le j < i} H(j)\f] +- Transform the image using \f$H'\f$ as a look-up table: \f$\texttt{dst}(x,y) = H'(\texttt{src}(x,y))\f$ + +The algorithm normalizes the brightness and increases the contrast of the image. +@note The returned image is of the same size and type as input. + +@note Function textual ID is "org.opencv.imgproc.equalizeHist" + +@param src Source 8-bit single channel image. + */ +GAPI_EXPORTS GMat equalizeHist(const GMat& src); + +//! @} gapi_filters + +//! @addtogroup gapi_colorconvert +//! @{ +/** @brief Converts an image from RGB color space to gray-scaled. +The conventional ranges for R, G, and B channel values are 0 to 255. +Resulting gray color value computed as +\f[\texttt{dst} (I)= \texttt{0.299} * \texttt{src}(I).R + \texttt{0.587} * \texttt{src}(I).G + \texttt{0.114} * \texttt{src}(I).B \f] + +@note Function textual ID is "org.opencv.imgproc.colorconvert.rgb2gray" + +@param src input image: 8-bit unsigned 3-channel image @ref CV_8UC1. +@sa RGB2YUV + */ +GAPI_EXPORTS GMat RGB2Gray(const GMat& src); + +/** @overload +Resulting gray color value computed as +\f[\texttt{dst} (I)= \texttt{rY} * \texttt{src}(I).R + \texttt{gY} * \texttt{src}(I).G + \texttt{bY} * \texttt{src}(I).B \f] + +@note Function textual ID is "org.opencv.imgproc.colorconvert.rgb2graycustom" + +@param src input image: 8-bit unsigned 3-channel image @ref CV_8UC1. +@param rY float multiplier for R channel. +@param gY float multiplier for G channel. +@param bY float multiplier for B channel. +@sa RGB2YUV + */ +GAPI_EXPORTS GMat RGB2Gray(const GMat& src, float rY, float gY, float bY); + +/** @brief Converts an image from BGR color space to gray-scaled. +The conventional ranges for B, G, and R channel values are 0 to 255. +Resulting gray color value computed as +\f[\texttt{dst} (I)= \texttt{0.114} * \texttt{src}(I).B + \texttt{0.587} * \texttt{src}(I).G + \texttt{0.299} * \texttt{src}(I).R \f] + +@note Function textual ID is "org.opencv.imgproc.colorconvert.bgr2gray" + +@param src input image: 8-bit unsigned 3-channel image @ref CV_8UC1. +@sa BGR2LUV + */ +GAPI_EXPORTS GMat BGR2Gray(const GMat& src); + +/** @brief Converts an image from RGB color space to YUV color space. + +The function converts an input image from RGB color space to YUV. +The conventional ranges for R, G, and B channel values are 0 to 255. + +In case of linear transformations, the range does not matter. But in case of a non-linear +transformation, an input RGB image should be normalized to the proper value range to get the correct +results, like here, at RGB \f$\rightarrow\f$ Y\*u\*v\* transformation. +Output image must be 8-bit unsigned 3-channel image @ref CV_8UC3. + +@note Function textual ID is "org.opencv.imgproc.colorconvert.rgb2yuv" + +@param src input image: 8-bit unsigned 3-channel image @ref CV_8UC3. +@sa YUV2RGB, RGB2Lab +*/ +GAPI_EXPORTS GMat RGB2YUV(const GMat& src); + +/** @brief Converts an image from BGR color space to LUV color space. + +The function converts an input image from BGR color space to LUV. +The conventional ranges for B, G, and R channel values are 0 to 255. + +Output image must be 8-bit unsigned 3-channel image @ref CV_8UC3. + +@note Function textual ID is "org.opencv.imgproc.colorconvert.bgr2luv" + +@param src input image: 8-bit unsigned 3-channel image @ref CV_8UC3. +@sa RGB2Lab, RGB2LUV +*/ +GAPI_EXPORTS GMat BGR2LUV(const GMat& src); + +/** @brief Converts an image from LUV color space to BGR color space. + +The function converts an input image from LUV color space to BGR. +The conventional ranges for B, G, and R channel values are 0 to 255. + +Output image must be 8-bit unsigned 3-channel image @ref CV_8UC3. + +@note Function textual ID is "org.opencv.imgproc.colorconvert.luv2bgr" + +@param src input image: 8-bit unsigned 3-channel image @ref CV_8UC3. +@sa BGR2LUV +*/ +GAPI_EXPORTS GMat LUV2BGR(const GMat& src); + +/** @brief Converts an image from YUV color space to BGR color space. + +The function converts an input image from YUV color space to BGR. +The conventional ranges for B, G, and R channel values are 0 to 255. + +Output image must be 8-bit unsigned 3-channel image @ref CV_8UC3. + +@note Function textual ID is "org.opencv.imgproc.colorconvert.yuv2bgr" + +@param src input image: 8-bit unsigned 3-channel image @ref CV_8UC3. +@sa BGR2YUV +*/ +GAPI_EXPORTS GMat YUV2BGR(const GMat& src); + +/** @brief Converts an image from BGR color space to YUV color space. + +The function converts an input image from BGR color space to YUV. +The conventional ranges for B, G, and R channel values are 0 to 255. + +Output image must be 8-bit unsigned 3-channel image @ref CV_8UC3. + +@note Function textual ID is "org.opencv.imgproc.colorconvert.bgr2yuv" + +@param src input image: 8-bit unsigned 3-channel image @ref CV_8UC3. +@sa YUV2BGR +*/ +GAPI_EXPORTS GMat BGR2YUV(const GMat& src); + +/** @brief Converts an image from RGB color space to Lab color space. + +The function converts an input image from BGR color space to Lab. +The conventional ranges for R, G, and B channel values are 0 to 255. + +Output image must be 8-bit unsigned 3-channel image @ref CV_8UC1. + +@note Function textual ID is "org.opencv.imgproc.colorconvert.rgb2lab" + +@param src input image: 8-bit unsigned 3-channel image @ref CV_8UC1. +@sa RGB2YUV, RGB2LUV +*/ +GAPI_EXPORTS GMat RGB2Lab(const GMat& src); + +/** @brief Converts an image from YUV color space to RGB. +The function converts an input image from YUV color space to RGB. +The conventional ranges for Y, U, and V channel values are 0 to 255. + +Output image must be 8-bit unsigned 3-channel image @ref CV_8UC3. + +@note Function textual ID is "org.opencv.imgproc.colorconvert.yuv2rgb" + +@param src input image: 8-bit unsigned 3-channel image @ref CV_8UC3. + +@sa RGB2Lab, RGB2YUV +*/ +GAPI_EXPORTS GMat YUV2RGB(const GMat& src); + +/** @brief Converts an image from NV12 (YUV420p) color space to RGB. +The function converts an input image from NV12 color space to RGB. +The conventional ranges for Y, U, and V channel values are 0 to 255. + +Output image must be 8-bit unsigned 3-channel image @ref CV_8UC3. + +@note Function textual ID is "org.opencv.imgproc.colorconvert.nv12torgb" + +@param src_y input image: 8-bit unsigned 1-channel image @ref CV_8UC1. +@param src_uv input image: 8-bit unsigned 2-channel image @ref CV_8UC2. + +@sa YUV2RGB, NV12toBGR +*/ +GAPI_EXPORTS GMat NV12toRGB(const GMat& src_y, const GMat& src_uv); + +/** @brief Converts an image from NV12 (YUV420p) color space to gray-scaled. +The function converts an input image from NV12 color space to gray-scaled. +The conventional ranges for Y, U, and V channel values are 0 to 255. + +Output image must be 8-bit unsigned 1-channel image @ref CV_8UC1. + +@note Function textual ID is "org.opencv.imgproc.colorconvert.nv12togray" + +@param src_y input image: 8-bit unsigned 1-channel image @ref CV_8UC1. +@param src_uv input image: 8-bit unsigned 2-channel image @ref CV_8UC2. + +@sa YUV2RGB, NV12toBGR +*/ +GAPI_EXPORTS GMat NV12toGray(const GMat& src_y, const GMat& src_uv); + +/** @brief Converts an image from NV12 (YUV420p) color space to BGR. +The function converts an input image from NV12 color space to RGB. +The conventional ranges for Y, U, and V channel values are 0 to 255. + +Output image must be 8-bit unsigned 3-channel image @ref CV_8UC3. + +@note Function textual ID is "org.opencv.imgproc.colorconvert.nv12tobgr" + +@param src_y input image: 8-bit unsigned 1-channel image @ref CV_8UC1. +@param src_uv input image: 8-bit unsigned 2-channel image @ref CV_8UC2. + +@sa YUV2BGR, NV12toRGB +*/ +GAPI_EXPORTS GMat NV12toBGR(const GMat& src_y, const GMat& src_uv); + +/** @brief Converts an image from BayerGR color space to RGB. +The function converts an input image from BayerGR color space to RGB. +The conventional ranges for G, R, and B channel values are 0 to 255. + +Output image must be 8-bit unsigned 3-channel image @ref CV_8UC3. + +@note Function textual ID is "org.opencv.imgproc.colorconvert.bayergr2rgb" + +@param src_gr input image: 8-bit unsigned 1-channel image @ref CV_8UC1. + +@sa YUV2BGR, NV12toRGB +*/ +GAPI_EXPORTS GMat BayerGR2RGB(const GMat& src_gr); + +/** @brief Converts an image from RGB color space to HSV. +The function converts an input image from RGB color space to HSV. +The conventional ranges for R, G, and B channel values are 0 to 255. + +Output image must be 8-bit unsigned 3-channel image @ref CV_8UC3. + +@note Function textual ID is "org.opencv.imgproc.colorconvert.rgb2hsv" + +@param src input image: 8-bit unsigned 3-channel image @ref CV_8UC3. + +@sa YUV2BGR, NV12toRGB +*/ +GAPI_EXPORTS GMat RGB2HSV(const GMat& src); + +/** @brief Converts an image from RGB color space to YUV422. +The function converts an input image from RGB color space to YUV422. +The conventional ranges for R, G, and B channel values are 0 to 255. + +Output image must be 8-bit unsigned 2-channel image @ref CV_8UC2. + +@note Function textual ID is "org.opencv.imgproc.colorconvert.rgb2yuv422" + +@param src input image: 8-bit unsigned 3-channel image @ref CV_8UC3. + +@sa YUV2BGR, NV12toRGB +*/ +GAPI_EXPORTS GMat RGB2YUV422(const GMat& src); + +/** @brief Converts an image from NV12 (YUV420p) color space to RGB. +The function converts an input image from NV12 color space to RGB. +The conventional ranges for Y, U, and V channel values are 0 to 255. + +Output image must be 8-bit unsigned planar 3-channel image @ref CV_8UC1. +Planar image memory layout is three planes laying in the memory contiguously, +so the image height should be plane_height*plane_number, +image type is @ref CV_8UC1. + +@note Function textual ID is "org.opencv.imgproc.colorconvert.nv12torgbp" + +@param src_y input image: 8-bit unsigned 1-channel image @ref CV_8UC1. +@param src_uv input image: 8-bit unsigned 2-channel image @ref CV_8UC2. + +@sa YUV2RGB, NV12toBGRp, NV12toRGB +*/ +GAPI_EXPORTS GMatP NV12toRGBp(const GMat &src_y, const GMat &src_uv); + +/** @brief Converts an image from NV12 (YUV420p) color space to BGR. +The function converts an input image from NV12 color space to BGR. +The conventional ranges for Y, U, and V channel values are 0 to 255. + +Output image must be 8-bit unsigned planar 3-channel image @ref CV_8UC1. +Planar image memory layout is three planes laying in the memory contiguously, +so the image height should be plane_height*plane_number, +image type is @ref CV_8UC1. + +@note Function textual ID is "org.opencv.imgproc.colorconvert.nv12torgbp" + +@param src_y input image: 8-bit unsigned 1-channel image @ref CV_8UC1. +@param src_uv input image: 8-bit unsigned 2-channel image @ref CV_8UC2. + +@sa YUV2RGB, NV12toRGBp, NV12toBGR +*/ +GAPI_EXPORTS GMatP NV12toBGRp(const GMat &src_y, const GMat &src_uv); + +//! @} gapi_colorconvert +} //namespace gapi +} //namespace cv + +#endif // OPENCV_GAPI_IMGPROC_HPP diff --git a/IPL/include/opencv/opencv2/gapi/infer.hpp b/IPL/include/opencv/opencv2/gapi/infer.hpp new file mode 100644 index 0000000..5a4caff --- /dev/null +++ b/IPL/include/opencv/opencv2/gapi/infer.hpp @@ -0,0 +1,219 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// +// Copyright (C) 2019 Intel Corporation + + +#ifndef OPENCV_GAPI_INFER_HPP +#define OPENCV_GAPI_INFER_HPP + +// FIXME: Inference API is currently only available in full mode +#if !defined(GAPI_STANDALONE) + +#include +#include // string +#include // tuple + +#include // any<> +#include // GKernelType[M], GBackend +#include // GArg +#include // CompileArgTag +#include // GMetaArg + +namespace cv { + +template class GNetworkType; + +// TODO: maybe tuple_wrap_helper from util.hpp may help with this. +// Multiple-return-value network definition (specialized base class) +template +class GNetworkType(Args...)> > +{ +public: + using InArgs = std::tuple; + using OutArgs = std::tuple; + + using Result = OutArgs; + using API = std::function; + + using ResultL = std::tuple< cv::GArray... >; + using APIList = std::function, Args...)>; +}; + +// Single-return-value network definition (specialized base class) +template +class GNetworkType > +{ +public: + using InArgs = std::tuple; + using OutArgs = std::tuple; + + using Result = R; + using API = std::function; + + using ResultL = cv::GArray; + using APIList = std::function, Args...)>; +}; + +// Base "Infer" kernel. Note - for whatever network, kernel ID +// is always the same. Different inference calls are distinguished by +// network _tag_ (an extra field in GCall) +// +// getOutMeta is a stub callback collected by G-API kernel subsystem +// automatically. This is a rare case when this callback is defined by +// a particular backend, not by a network itself. +struct GInferBase { + static constexpr const char * id() { + return "org.opencv.dnn.infer"; // Universal stub + } + static GMetaArgs getOutMeta(const GMetaArgs &, const GArgs &) { + return GMetaArgs{}; // One more universal stub + } +}; + + +// Base "Infer list" kernel. +// All notes from "Infer" kernel apply here as well. +struct GInferListBase { + static constexpr const char * id() { + return "org.opencv.dnn.infer-roi"; // Universal stub + } + static GMetaArgs getOutMeta(const GMetaArgs &, const GArgs &) { + return GMetaArgs{}; // One more universal stub + } +}; + +// A generic inference kernel. API (::on()) is fully defined by the Net +// template parameter. +// Acts as a regular kernel in graph (via KernelTypeMedium). +template +struct GInfer final + : public GInferBase + , public detail::KernelTypeMedium< GInfer + , typename Net::API > { + using GInferBase::getOutMeta; // FIXME: name lookup conflict workaround? + + static constexpr const char* tag() { return Net::tag(); } +}; + +// A generic roi-list inference kernel. API (::on()) is derived from +// the Net template parameter (see more in infer<> overload). +template +struct GInferList final + : public GInferListBase + , public detail::KernelTypeMedium< GInferList + , typename Net::APIList > { + using GInferListBase::getOutMeta; // FIXME: name lookup conflict workaround? + + static constexpr const char* tag() { return Net::tag(); } +}; + +} // namespace cv + +// FIXME: Probably the signature makes a function/tuple/function round-trip +#define G_API_NET(Class, API, Tag) \ + struct Class final: public cv::GNetworkType { \ + static constexpr const char * tag() { return Tag; } \ + } + +namespace cv { +namespace gapi { + + +/** @brief Calculates responses for the specified network (template + * parameter) for every region in the source image. + * + * @tparam A network type defined with G_API_NET() macro. + * @param roi a list of rectangles describing regions of interest + * in the source image. Usually an output of object detector or tracker. + * @param args network's input parameters as specified in G_API_NET() macro. + * NOTE: verified to work reliably with 1-input topologies only. + * @return a list of objects of return type as defined in G_API_NET(). + * If a network has multiple return values (defined with a tuple), a tuple of + * GArray<> objects is returned with the appropriate types inside. + * @sa G_API_NET() + */ +template +typename Net::ResultL infer(cv::GArray roi, Args&&... args) { + return GInferList::on(roi, std::forward(args)...); +} + +/** + * @brief Calculates response for the specified network (template + * parameter) given the input data. + * + * @tparam A network type defined with G_API_NET() macro. + * @param args network's input parameters as specified in G_API_NET() macro. + * @return an object of return type as defined in G_API_NET(). + * If a network has multiple return values (defined with a tuple), a tuple of + * objects of appropriate type is returned. + * @sa G_API_NET() + */ +template +typename Net::Result infer(Args&&... args) { + return GInfer::on(std::forward(args)...); +} + + +} // namespace gapi +} // namespace cv + +#endif // GAPI_STANDALONE + +namespace cv { +namespace gapi { + +// Note: the below code _is_ part of STANDALONE build, +// just to make our compiler code compileable. + +// A type-erased form of network parameters. +// Similar to how a type-erased GKernel is represented and used. +struct GAPI_EXPORTS GNetParam { + std::string tag; // FIXME: const? + GBackend backend; // Specifies the execution model + util::any params; // Backend-interpreted parameter structure +}; + +/** \addtogroup gapi_compile_args + * @{ + */ +/** + * @brief A container class for network configurations. Similar to + * GKernelPackage.Use cv::gapi::networks() to construct this object. + * + * @sa cv::gapi::networks + */ +struct GAPI_EXPORTS GNetPackage { + GNetPackage() : GNetPackage({}) {} + explicit GNetPackage(std::initializer_list &&ii); + std::vector backends() const; + std::vector networks; +}; +/** @} gapi_compile_args */ +} // namespace gapi + +namespace detail { +template +gapi::GNetParam strip(T&& t) { + return gapi::GNetParam { t.tag() + , t.backend() + , t.params() + }; +} + +template<> struct CompileArgTag { + static const char* tag() { return "gapi.net_package"; } +}; + +} // namespace cv::detail + +namespace gapi { +template +cv::gapi::GNetPackage networks(Args&&... args) { + return cv::gapi::GNetPackage({ cv::detail::strip(args)... }); +} +} // namespace gapi +} // namespace cv + +#endif // OPENCV_GAPI_INFER_HPP diff --git a/IPL/include/opencv/opencv2/gapi/infer/ie.hpp b/IPL/include/opencv/opencv2/gapi/infer/ie.hpp new file mode 100644 index 0000000..6e8c2c3 --- /dev/null +++ b/IPL/include/opencv/opencv2/gapi/infer/ie.hpp @@ -0,0 +1,123 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// +// Copyright (C) 2019 Intel Corporation + +#ifndef OPENCV_GAPI_INFER_IE_HPP +#define OPENCV_GAPI_INFER_IE_HPP + +#include +#include +#include +#include // tuple, tuple_size + +#include +#include + +#include // GAPI_EXPORTS +#include // GKernelPackage + +namespace cv { +namespace gapi { +// FIXME: introduce a new sub-namespace for NN? +namespace ie { + +GAPI_EXPORTS cv::gapi::GBackend backend(); + +/** + * Specify how G-API and IE should trait input data + * + * In OpenCV, the same cv::Mat is used to represent both + * image and tensor data. Sometimes those are hardly distinguishable, + * so this extra parameter is used to give G-API a hint. + * + * This hint controls how G-API reinterprets the data when converting + * it to IE Blob format (and which layout/etc is assigned to this data). + */ +enum class TraitAs: int +{ + TENSOR, //!< G-API traits an associated cv::Mat as a raw tensor and passes dimensions as-is + IMAGE //!< G-API traits an associated cv::Mat as an image so creates an "image" blob (NCHW/NHWC, etc) +}; + +namespace detail { + struct ParamDesc { + std::string model_path; + std::string weights_path; + std::string device_id; + + // NB: Here order follows the `Net` API + std::vector input_names; + std::vector output_names; + + using ConstInput = std::pair; + std::unordered_map const_inputs; + + // NB: nun_* may differ from topology's real input/output port numbers + // (e.g. topology's partial execution) + std::size_t num_in; // How many inputs are defined in the operation + std::size_t num_out; // How many outputs are defined in the operation + }; +} // namespace detail + +// FIXME: this is probably a shared (reusable) thing +template +struct PortCfg { + using In = std::array + < std::string + , std::tuple_size::value >; + using Out = std::array + < std::string + , std::tuple_size::value >; +}; + +template class Params { +public: + Params(const std::string &model, + const std::string &weights, + const std::string &device) + : desc{ model, weights, device, {}, {}, {} + , std::tuple_size::value + , std::tuple_size::value + } { + }; + + Params& cfgInputLayers(const typename PortCfg::In &ll) { + desc.input_names.clear(); + desc.input_names.reserve(ll.size()); + std::copy(ll.begin(), ll.end(), + std::back_inserter(desc.input_names)); + return *this; + } + + Params& cfgOutputLayers(const typename PortCfg::Out &ll) { + desc.output_names.clear(); + desc.output_names.reserve(ll.size()); + std::copy(ll.begin(), ll.end(), + std::back_inserter(desc.output_names)); + return *this; + } + + Params& constInput(const std::string &layer_name, + const cv::Mat &data, + TraitAs hint = TraitAs::TENSOR) { + desc.const_inputs[layer_name] = {data, hint}; + return *this; + } + + // BEGIN(G-API's network parametrization API) + GBackend backend() const { return cv::gapi::ie::backend(); } + std::string tag() const { return Net::tag(); } + cv::util::any params() const { return { desc }; } + // END(G-API's network parametrization API) + +protected: + detail::ParamDesc desc; +}; + +} // namespace ie +} // namespace gapi +} // namespace cv + +#endif // OPENCV_GAPI_INFER_HPP diff --git a/IPL/include/opencv/opencv2/gapi/ocl/core.hpp b/IPL/include/opencv/opencv2/gapi/ocl/core.hpp new file mode 100644 index 0000000..4ab85e2 --- /dev/null +++ b/IPL/include/opencv/opencv2/gapi/ocl/core.hpp @@ -0,0 +1,27 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// +// Copyright (C) 2018 Intel Corporation + + +#ifndef OPENCV_GAPI_OCL_CORE_API_HPP +#define OPENCV_GAPI_OCL_CORE_API_HPP + +#include // GAPI_EXPORTS +#include // GKernelPackage + +namespace cv { +namespace gapi { +namespace core { +namespace ocl { + + GAPI_EXPORTS GKernelPackage kernels(); + +} // namespace ocl +} // namespace core +} // namespace gapi +} // namespace cv + + +#endif // OPENCV_GAPI_OCL_CORE_API_HPP diff --git a/IPL/include/opencv/opencv2/gapi/ocl/goclkernel.hpp b/IPL/include/opencv/opencv2/gapi/ocl/goclkernel.hpp new file mode 100644 index 0000000..ee363c0 --- /dev/null +++ b/IPL/include/opencv/opencv2/gapi/ocl/goclkernel.hpp @@ -0,0 +1,246 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// +// Copyright (C) 2018-2019 Intel Corporation + + +#ifndef OPENCV_GAPI_GOCLKERNEL_HPP +#define OPENCV_GAPI_GOCLKERNEL_HPP + +#include +#include +#include +#include + +#include +#include +#include +#include + +// FIXME: namespace scheme for backends? +namespace cv { + +namespace gimpl +{ + // Forward-declare an internal class + class GOCLExecutable; +} // namespace gimpl + +namespace gapi +{ +namespace ocl +{ + /** + * \addtogroup gapi_std_backends G-API Standard Backends + * @{ + */ + /** + * @brief Get a reference to OCL backend. + * + * At the moment, the OCL backend is built atop of OpenCV + * "Transparent API" (T-API), see cv::UMat for details. + * + * @sa gapi_std_backends + */ + GAPI_EXPORTS cv::gapi::GBackend backend(); + /** @} */ +} // namespace ocl +} // namespace gapi + + +// Represents arguments which are passed to a wrapped OCL function +// FIXME: put into detail? +class GAPI_EXPORTS GOCLContext +{ +public: + // Generic accessor API + template + const T& inArg(int input) { return m_args.at(input).get(); } + + // Syntax sugar + const cv::UMat& inMat(int input); + cv::UMat& outMatR(int output); // FIXME: Avoid cv::Mat m = ctx.outMatR() + + const cv::Scalar& inVal(int input); + cv::Scalar& outValR(int output); // FIXME: Avoid cv::Scalar s = ctx.outValR() + template std::vector& outVecR(int output) // FIXME: the same issue + { + return outVecRef(output).wref(); + } + template T& outOpaqueR(int output) // FIXME: the same issue + { + return outOpaqueRef(output).wref(); + } + +protected: + detail::VectorRef& outVecRef(int output); + detail::VectorRef& outOpaqueRef(int output); + + std::vector m_args; + std::unordered_map m_results; + + + friend class gimpl::GOCLExecutable; +}; + +class GAPI_EXPORTS GOCLKernel +{ +public: + // This function is kernel's execution entry point (does the processing work) + using F = std::function; + + GOCLKernel(); + explicit GOCLKernel(const F& f); + + void apply(GOCLContext &ctx); + +protected: + F m_f; +}; + +// FIXME: This is an ugly ad-hoc implementation. TODO: refactor + +namespace detail +{ +template struct ocl_get_in; +template<> struct ocl_get_in +{ + static cv::UMat get(GOCLContext &ctx, int idx) { return ctx.inMat(idx); } +}; +template<> struct ocl_get_in +{ + static cv::Scalar get(GOCLContext &ctx, int idx) { return ctx.inVal(idx); } +}; +template struct ocl_get_in > +{ + static const std::vector& get(GOCLContext &ctx, int idx) { return ctx.inArg(idx).rref(); } +}; +template struct ocl_get_in > +{ + static const U& get(GOCLContext &ctx, int idx) { return ctx.inArg(idx).rref(); } +}; +template struct ocl_get_in +{ + static T get(GOCLContext &ctx, int idx) { return ctx.inArg(idx); } +}; + +struct tracked_cv_umat{ + //TODO Think if T - API could reallocate UMat to a proper size - how do we handle this ? + //tracked_cv_umat(cv::UMat& m) : r{(m)}, original_data{m.getMat(ACCESS_RW).data} {} + tracked_cv_umat(cv::UMat& m) : r(m), original_data{ nullptr } {} + cv::UMat &r; // FIXME: It was a value (not a reference) before. + // Actually OCL backend should allocate its internal data! + uchar* original_data; + + operator cv::UMat& (){ return r;} + void validate() const{ + //if (r.getMat(ACCESS_RW).data != original_data) + //{ + // util::throw_error + // (std::logic_error + // ("OpenCV kernel output parameter was reallocated. \n" + // "Incorrect meta data was provided ?")); + //} + + } +}; + +template +void postprocess_ocl(Outputs&... outs) +{ + struct + { + void operator()(tracked_cv_umat* bm) { bm->validate(); } + void operator()(...) { } + + } validate; + //dummy array to unfold parameter pack + int dummy[] = { 0, (validate(&outs), 0)... }; + cv::util::suppress_unused_warning(dummy); +} + +template struct ocl_get_out; +template<> struct ocl_get_out +{ + static tracked_cv_umat get(GOCLContext &ctx, int idx) + { + auto& r = ctx.outMatR(idx); + return{ r }; + } +}; +template<> struct ocl_get_out +{ + static cv::Scalar& get(GOCLContext &ctx, int idx) + { + return ctx.outValR(idx); + } +}; +template struct ocl_get_out > +{ + static std::vector& get(GOCLContext &ctx, int idx) { return ctx.outVecR(idx); } +}; +template struct ocl_get_out > +{ + static U& get(GOCLContext &ctx, int idx) { return ctx.outOpaqueR(idx); } +}; + +template +struct OCLCallHelper; + +// FIXME: probably can be simplified with std::apply or analogue. +template +struct OCLCallHelper, std::tuple > +{ + template + struct call_and_postprocess + { + template + static void call(Inputs&&... ins, Outputs&&... outs) + { + //not using a std::forward on outs is deliberate in order to + //cause compilation error, by trying to bind rvalue references to lvalue references + Impl::run(std::forward(ins)..., outs...); + + postprocess_ocl(outs...); + } + }; + + template + static void call_impl(GOCLContext &ctx, detail::Seq, detail::Seq) + { + //TODO: Make sure that OpenCV kernels do not reallocate memory for output parameters + //by comparing it's state (data ptr) before and after the call. + //Convert own::Scalar to cv::Scalar before call kernel and run kernel + //convert cv::Scalar to own::Scalar after call kernel and write back results + call_and_postprocess::get(ctx, IIs))...>::call(ocl_get_in::get(ctx, IIs)..., ocl_get_out::get(ctx, OIs)...); + } + + static void call(GOCLContext &ctx) + { + call_impl(ctx, + typename detail::MkSeq::type(), + typename detail::MkSeq::type()); + } +}; + +} // namespace detail + +template +class GOCLKernelImpl: public cv::detail::OCLCallHelper, + public cv::detail::KernelTag +{ + using P = detail::OCLCallHelper; + +public: + using API = K; + + static cv::gapi::GBackend backend() { return cv::gapi::ocl::backend(); } + static cv::GOCLKernel kernel() { return GOCLKernel(&P::call); } +}; + +#define GAPI_OCL_KERNEL(Name, API) struct Name: public cv::GOCLKernelImpl + +} // namespace cv + +#endif // OPENCV_GAPI_GOCLKERNEL_HPP diff --git a/IPL/include/opencv/opencv2/gapi/ocl/imgproc.hpp b/IPL/include/opencv/opencv2/gapi/ocl/imgproc.hpp new file mode 100644 index 0000000..1bb5911 --- /dev/null +++ b/IPL/include/opencv/opencv2/gapi/ocl/imgproc.hpp @@ -0,0 +1,27 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// +// Copyright (C) 2018 Intel Corporation + + +#ifndef OPENCV_GAPI_OCL_IMGPROC_API_HPP +#define OPENCV_GAPI_OCL_IMGPROC_API_HPP + +#include // GAPI_EXPORTS +#include // GKernelPackage + +namespace cv { +namespace gapi { +namespace imgproc { +namespace ocl { + + GAPI_EXPORTS GKernelPackage kernels(); + +} // namespace ocl +} // namespace imgproc +} // namespace gapi +} // namespace cv + + +#endif // OPENCV_GAPI_OCL_IMGPROC_API_HPP diff --git a/IPL/include/opencv/opencv2/gapi/opencv_includes.hpp b/IPL/include/opencv/opencv2/gapi/opencv_includes.hpp new file mode 100644 index 0000000..5acf280 --- /dev/null +++ b/IPL/include/opencv/opencv2/gapi/opencv_includes.hpp @@ -0,0 +1,21 @@ +// This file is part of OpenCV project. + +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// +// Copyright (C) 2018 Intel Corporation + + +#ifndef OPENCV_GAPI_OPENCV_INCLUDES_HPP +#define OPENCV_GAPI_OPENCV_INCLUDES_HPP + +#if !defined(GAPI_STANDALONE) +# include +# include +# include +# include +#else // Without OpenCV +# include +#endif // !defined(GAPI_STANDALONE) + +#endif // OPENCV_GAPI_OPENCV_INCLUDES_HPP diff --git a/IPL/include/opencv/opencv2/gapi/operators.hpp b/IPL/include/opencv/opencv2/gapi/operators.hpp new file mode 100644 index 0000000..b20062c --- /dev/null +++ b/IPL/include/opencv/opencv2/gapi/operators.hpp @@ -0,0 +1,69 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// +// Copyright (C) 2018 Intel Corporation + + +#ifndef OPENCV_GAPI_OPERATORS_HPP +#define OPENCV_GAPI_OPERATORS_HPP + +#include +#include + +GAPI_EXPORTS cv::GMat operator+(const cv::GMat& lhs, const cv::GMat& rhs); + +GAPI_EXPORTS cv::GMat operator+(const cv::GMat& lhs, const cv::GScalar& rhs); +GAPI_EXPORTS cv::GMat operator+(const cv::GScalar& lhs, const cv::GMat& rhs); + +GAPI_EXPORTS cv::GMat operator-(const cv::GMat& lhs, const cv::GMat& rhs); + +GAPI_EXPORTS cv::GMat operator-(const cv::GMat& lhs, const cv::GScalar& rhs); +GAPI_EXPORTS cv::GMat operator-(const cv::GScalar& lhs, const cv::GMat& rhs); + +GAPI_EXPORTS cv::GMat operator*(const cv::GMat& lhs, float rhs); +GAPI_EXPORTS cv::GMat operator*(float lhs, const cv::GMat& rhs); +GAPI_EXPORTS cv::GMat operator*(const cv::GMat& lhs, const cv::GScalar& rhs); +GAPI_EXPORTS cv::GMat operator*(const cv::GScalar& lhs, const cv::GMat& rhs); + +GAPI_EXPORTS cv::GMat operator/(const cv::GMat& lhs, const cv::GScalar& rhs); +GAPI_EXPORTS cv::GMat operator/(const cv::GScalar& lhs, const cv::GMat& rhs); +GAPI_EXPORTS cv::GMat operator/(const cv::GMat& lhs, const cv::GMat& rhs); + +GAPI_EXPORTS cv::GMat operator&(const cv::GMat& lhs, const cv::GMat& rhs); +GAPI_EXPORTS cv::GMat operator|(const cv::GMat& lhs, const cv::GMat& rhs); +GAPI_EXPORTS cv::GMat operator^(const cv::GMat& lhs, const cv::GMat& rhs); +GAPI_EXPORTS cv::GMat operator~(const cv::GMat& lhs); + +GAPI_EXPORTS cv::GMat operator&(const cv::GScalar& lhs, const cv::GMat& rhs); +GAPI_EXPORTS cv::GMat operator|(const cv::GScalar& lhs, const cv::GMat& rhs); +GAPI_EXPORTS cv::GMat operator^(const cv::GScalar& lhs, const cv::GMat& rhs); + +GAPI_EXPORTS cv::GMat operator&(const cv::GMat& lhs, const cv::GScalar& rhs); +GAPI_EXPORTS cv::GMat operator|(const cv::GMat& lhs, const cv::GScalar& rhs); +GAPI_EXPORTS cv::GMat operator^(const cv::GMat& lhs, const cv::GScalar& rhs); + +GAPI_EXPORTS cv::GMat operator>(const cv::GMat& lhs, const cv::GMat& rhs); +GAPI_EXPORTS cv::GMat operator>=(const cv::GMat& lhs, const cv::GMat& rhs); +GAPI_EXPORTS cv::GMat operator<(const cv::GMat& lhs, const cv::GMat& rhs); +GAPI_EXPORTS cv::GMat operator<=(const cv::GMat& lhs, const cv::GMat& rhs); +GAPI_EXPORTS cv::GMat operator==(const cv::GMat& lhs, const cv::GMat& rhs); +GAPI_EXPORTS cv::GMat operator!=(const cv::GMat& lhs, const cv::GMat& rhs); + +GAPI_EXPORTS cv::GMat operator>(const cv::GMat& lhs, const cv::GScalar& rhs); +GAPI_EXPORTS cv::GMat operator>=(const cv::GMat& lhs, const cv::GScalar& rhs); +GAPI_EXPORTS cv::GMat operator<(const cv::GMat& lhs, const cv::GScalar& rhs); +GAPI_EXPORTS cv::GMat operator<=(const cv::GMat& lhs, const cv::GScalar& rhs); +GAPI_EXPORTS cv::GMat operator==(const cv::GMat& lhs, const cv::GScalar& rhs); +GAPI_EXPORTS cv::GMat operator!=(const cv::GMat& lhs, const cv::GScalar& rhs); + +GAPI_EXPORTS cv::GMat operator>(const cv::GScalar& lhs, const cv::GMat& rhs); +GAPI_EXPORTS cv::GMat operator>=(const cv::GScalar& lhs, const cv::GMat& rhs); +GAPI_EXPORTS cv::GMat operator<(const cv::GScalar& lhs, const cv::GMat& rhs); +GAPI_EXPORTS cv::GMat operator<=(const cv::GScalar& lhs, const cv::GMat& rhs); +GAPI_EXPORTS cv::GMat operator==(const cv::GScalar& lhs, const cv::GMat& rhs); +GAPI_EXPORTS cv::GMat operator!=(const cv::GScalar& lhs, const cv::GMat& rhs); + + + +#endif // OPENCV_GAPI_OPERATORS_HPP diff --git a/IPL/include/opencv/opencv2/gapi/own/assert.hpp b/IPL/include/opencv/opencv2/gapi/own/assert.hpp new file mode 100644 index 0000000..d0e0f1c --- /dev/null +++ b/IPL/include/opencv/opencv2/gapi/own/assert.hpp @@ -0,0 +1,43 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// +// Copyright (C) 2018 Intel Corporation + + +#ifndef OPENCV_GAPI_OWN_ASSERT_HPP +#define OPENCV_GAPI_OWN_ASSERT_HPP + +#if !defined(GAPI_STANDALONE) +#include +#define GAPI_Assert CV_Assert +#define GAPI_DbgAssert CV_DbgAssert + +#else +#include +#include +#include + +namespace detail +{ + [[noreturn]] inline void assert_abort(const char* str, int line, const char* file, const char* func) + { + std::stringstream ss; + ss << file << ":" << line << ": Assertion " << str << " in function " << func << " failed\n"; + cv::util::throw_error(std::logic_error(ss.str())); + } +} + +#define GAPI_Assert(expr) \ +{ if (!(expr)) ::detail::assert_abort(#expr, __LINE__, __FILE__, __func__); } + + +#ifdef NDEBUG +# define GAPI_DbgAssert(expr) +#else +# define GAPI_DbgAssert(expr) GAPI_Assert(expr) +#endif + +#endif // GAPI_STANDALONE + +#endif // OPENCV_GAPI_OWN_ASSERT_HPP diff --git a/IPL/include/opencv/opencv2/gapi/own/convert.hpp b/IPL/include/opencv/opencv2/gapi/own/convert.hpp new file mode 100644 index 0000000..58f291c --- /dev/null +++ b/IPL/include/opencv/opencv2/gapi/own/convert.hpp @@ -0,0 +1,51 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// +// Copyright (C) 2018 Intel Corporation + + +#ifndef OPENCV_GAPI_OWN_CONVERT_HPP +#define OPENCV_GAPI_OWN_CONVERT_HPP + +#if !defined(GAPI_STANDALONE) + +#include +#include + +namespace cv +{ + template + std::vector to_own(const cv::MatSize &sz) { + std::vector result(sz.dims()); + for (int i = 0; i < sz.dims(); i++) { + // Note: cv::MatSize is not iterable + result[i] = static_cast(sz[i]); + } + return result; + } + + cv::gapi::own::Mat to_own(Mat&&) = delete; + + inline cv::gapi::own::Mat to_own(Mat const& m) { + return (m.dims == 2) + ? cv::gapi::own::Mat{m.rows, m.cols, m.type(), m.data, m.step} + : cv::gapi::own::Mat{to_own(m.size), m.type(), m.data}; + }; +namespace gapi +{ +namespace own +{ + inline cv::Mat to_ocv(Mat const& m) { + return m.dims.empty() + ? cv::Mat{m.rows, m.cols, m.type(), m.data, m.step} + : cv::Mat{m.dims, m.type(), m.data}; + } + cv::Mat to_ocv(Mat&&) = delete; +} // namespace own +} // namespace gapi +} // namespace cv + +#endif // !defined(GAPI_STANDALONE) + +#endif // OPENCV_GAPI_OWN_CONVERT_HPP diff --git a/IPL/include/opencv/opencv2/gapi/own/cvdefs.hpp b/IPL/include/opencv/opencv2/gapi/own/cvdefs.hpp new file mode 100644 index 0000000..354609b --- /dev/null +++ b/IPL/include/opencv/opencv2/gapi/own/cvdefs.hpp @@ -0,0 +1,157 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// +// Copyright (C) 2018 Intel Corporation + + +#ifndef OPENCV_GAPI_CV_DEFS_HPP +#define OPENCV_GAPI_CV_DEFS_HPP + +#if defined(GAPI_STANDALONE) +#include // cv::gapi::own::Rect/Size/Point +#include // cv::gapi::own::Scalar + +// Simulate OpenCV definitions taken from various +// OpenCV interface headers if G-API is built in a +// standalone mode. + +// interface.h: + +typedef unsigned char uchar; +typedef char schar; + +typedef unsigned short ushort; + +#define CV_CN_MAX 512 +#define CV_CN_SHIFT 3 +#define CV_DEPTH_MAX (1 << CV_CN_SHIFT) + + +#define CV_8U 0 +#define CV_8S 1 +#define CV_16U 2 +#define CV_16S 3 +#define CV_32S 4 +#define CV_32F 5 +#define CV_64F 6 +#define CV_USRTYPE1 7 + +#define CV_MAT_DEPTH_MASK (CV_DEPTH_MAX - 1) +#define CV_MAT_DEPTH(flags) ((flags) & CV_MAT_DEPTH_MASK) + +#define CV_MAKETYPE(depth,cn) (CV_MAT_DEPTH(depth) + (((cn)-1) << CV_CN_SHIFT)) +#define CV_MAKE_TYPE CV_MAKETYPE + +#define CV_8UC1 CV_MAKETYPE(CV_8U,1) +#define CV_8UC2 CV_MAKETYPE(CV_8U,2) +#define CV_8UC3 CV_MAKETYPE(CV_8U,3) +#define CV_8UC4 CV_MAKETYPE(CV_8U,4) +#define CV_8UC(n) CV_MAKETYPE(CV_8U,(n)) + +#define CV_8SC1 CV_MAKETYPE(CV_8S,1) +#define CV_8SC2 CV_MAKETYPE(CV_8S,2) +#define CV_8SC3 CV_MAKETYPE(CV_8S,3) +#define CV_8SC4 CV_MAKETYPE(CV_8S,4) +#define CV_8SC(n) CV_MAKETYPE(CV_8S,(n)) + +#define CV_16UC1 CV_MAKETYPE(CV_16U,1) +#define CV_16UC2 CV_MAKETYPE(CV_16U,2) +#define CV_16UC3 CV_MAKETYPE(CV_16U,3) +#define CV_16UC4 CV_MAKETYPE(CV_16U,4) +#define CV_16UC(n) CV_MAKETYPE(CV_16U,(n)) + +#define CV_16SC1 CV_MAKETYPE(CV_16S,1) +#define CV_16SC2 CV_MAKETYPE(CV_16S,2) +#define CV_16SC3 CV_MAKETYPE(CV_16S,3) +#define CV_16SC4 CV_MAKETYPE(CV_16S,4) +#define CV_16SC(n) CV_MAKETYPE(CV_16S,(n)) + +#define CV_32SC1 CV_MAKETYPE(CV_32S,1) +#define CV_32SC2 CV_MAKETYPE(CV_32S,2) +#define CV_32SC3 CV_MAKETYPE(CV_32S,3) +#define CV_32SC4 CV_MAKETYPE(CV_32S,4) +#define CV_32SC(n) CV_MAKETYPE(CV_32S,(n)) + +#define CV_32FC1 CV_MAKETYPE(CV_32F,1) +#define CV_32FC2 CV_MAKETYPE(CV_32F,2) +#define CV_32FC3 CV_MAKETYPE(CV_32F,3) +#define CV_32FC4 CV_MAKETYPE(CV_32F,4) +#define CV_32FC(n) CV_MAKETYPE(CV_32F,(n)) + +#define CV_64FC1 CV_MAKETYPE(CV_64F,1) +#define CV_64FC2 CV_MAKETYPE(CV_64F,2) +#define CV_64FC3 CV_MAKETYPE(CV_64F,3) +#define CV_64FC4 CV_MAKETYPE(CV_64F,4) +#define CV_64FC(n) CV_MAKETYPE(CV_64F,(n)) + +// cvdef.h: + +#define CV_MAT_CN_MASK ((CV_CN_MAX - 1) << CV_CN_SHIFT) +#define CV_MAT_CN(flags) ((((flags) & CV_MAT_CN_MASK) >> CV_CN_SHIFT) + 1) +#define CV_MAT_TYPE_MASK (CV_DEPTH_MAX*CV_CN_MAX - 1) +#define CV_MAT_TYPE(flags) ((flags) & CV_MAT_TYPE_MASK) +#define CV_MAT_CONT_FLAG_SHIFT 14 +#define CV_MAT_CONT_FLAG (1 << CV_MAT_CONT_FLAG_SHIFT) +#define CV_IS_MAT_CONT(flags) ((flags) & CV_MAT_CONT_FLAG) +#define CV_IS_CONT_MAT CV_IS_MAT_CONT +#define CV_SUBMAT_FLAG_SHIFT 15 +#define CV_SUBMAT_FLAG (1 << CV_SUBMAT_FLAG_SHIFT) +#define CV_IS_SUBMAT(flags) ((flags) & CV_MAT_SUBMAT_FLAG) + +///** Size of each channel item, +// 0x8442211 = 1000 0100 0100 0010 0010 0001 0001 ~ array of sizeof(arr_type_elem) */ +//#define CV_ELEM_SIZE1(type) \ +// ((((sizeof(size_t)<<28)|0x8442211) >> CV_MAT_DEPTH(type)*4) & 15) + +#define CV_MAT_TYPE(flags) ((flags) & CV_MAT_TYPE_MASK) + +/** 0x3a50 = 11 10 10 01 01 00 00 ~ array of log2(sizeof(arr_type_elem)) */ +#define CV_ELEM_SIZE(type) \ + (CV_MAT_CN(type) << ((((sizeof(size_t)/4+1)*16384|0x3a50) >> CV_MAT_DEPTH(type)*2) & 3)) + +#ifndef CV_OVERRIDE +# define CV_OVERRIDE override +#endif + +// base.h: +namespace cv +{ +enum BorderTypes { + BORDER_CONSTANT = 0, //!< `iiiiii|abcdefgh|iiiiiii` with some specified `i` + BORDER_REPLICATE = 1, //!< `aaaaaa|abcdefgh|hhhhhhh` + BORDER_REFLECT = 2, //!< `fedcba|abcdefgh|hgfedcb` + BORDER_WRAP = 3, //!< `cdefgh|abcdefgh|abcdefg` + BORDER_REFLECT_101 = 4, //!< `gfedcb|abcdefgh|gfedcba` + BORDER_TRANSPARENT = 5, //!< `uvwxyz|abcdefgh|ijklmno` + + BORDER_REFLECT101 = BORDER_REFLECT_101, //!< same as BORDER_REFLECT_101 + BORDER_DEFAULT = BORDER_REFLECT_101, //!< same as BORDER_REFLECT_101 + BORDER_ISOLATED = 16 //!< do not look outside of ROI +}; +// imgproc.hpp: +enum InterpolationFlags{ + INTER_NEAREST = 0, + INTER_LINEAR = 1, + INTER_CUBIC = 2, + INTER_AREA = 3, + INTER_LANCZOS4 = 4, + INTER_LINEAR_EXACT = 5, + INTER_MAX = 7, +}; +// replacement of cv's structures: +using Rect = gapi::own::Rect; +using Size = gapi::own::Size; +using Point = gapi::own::Point; +using Scalar = gapi::own::Scalar; +} // namespace cv + +static inline int cvFloor( double value ) +{ + int i = (int)value; + return i - (i > value); +} + +#endif // defined(GAPI_STANDALONE) + +#endif // OPENCV_GAPI_CV_DEFS_HPP diff --git a/IPL/include/opencv/opencv2/gapi/own/exports.hpp b/IPL/include/opencv/opencv2/gapi/own/exports.hpp new file mode 100644 index 0000000..53bff2a --- /dev/null +++ b/IPL/include/opencv/opencv2/gapi/own/exports.hpp @@ -0,0 +1,31 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// +// Copyright (C) 2018 Intel Corporation + + +#ifndef OPENCV_GAPI_OWN_TYPES_HPP +#define OPENCV_GAPI_OWN_TYPES_HPP + +# if defined(__OPENCV_BUILD) +# include +# define GAPI_EXPORTS CV_EXPORTS +# else +# define GAPI_EXPORTS + +#if 0 // Note: the following version currently is not needed for non-OpenCV build +# if defined _WIN32 +# define GAPI_EXPORTS __declspec(dllexport) +# elif defined __GNUC__ && __GNUC__ >= 4 +# define GAPI_EXPORTS __attribute__ ((visibility ("default"))) +# endif + +# ifndef GAPI_EXPORTS +# define GAPI_EXPORTS +# endif +#endif + +# endif + +#endif // OPENCV_GAPI_OWN_TYPES_HPP diff --git a/IPL/include/opencv/opencv2/gapi/own/mat.hpp b/IPL/include/opencv/opencv2/gapi/own/mat.hpp new file mode 100644 index 0000000..a5f5b5e --- /dev/null +++ b/IPL/include/opencv/opencv2/gapi/own/mat.hpp @@ -0,0 +1,341 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// +// Copyright (C) 2018 Intel Corporation + + +#ifndef OPENCV_GAPI_OWN_MAT_HPP +#define OPENCV_GAPI_OWN_MAT_HPP + +#include +#include +#include +#include +#include + +#include //std::shared_ptr +#include //std::memcpy +#include //std::accumulate +#include + +namespace cv { namespace gapi { namespace own { + namespace detail { + template + void assign_row(void* ptr, int cols, Scalar const& s) + { + auto p = static_cast(ptr); + for (int c = 0; c < cols; c++) + { + for (int ch = 0; ch < channels; ch++) + { + p[c * channels + ch] = saturate(s[ch], roundd); + } + } + } + + inline size_t default_step(int type, int cols) + { + return CV_ELEM_SIZE(type) * cols; + } + //Matrix header, i.e. fields that are unique to each Mat object. + //Devoted class is needed to implement custom behavior on move (erasing state of moved from object) + struct MatHeader{ + enum { AUTO_STEP = 0}; + enum { TYPE_MASK = 0x00000FFF }; + + MatHeader() = default; + + MatHeader(int _rows, int _cols, int type, void* _data, size_t _step) + : flags((type & TYPE_MASK)), rows(_rows), cols(_cols), data((uchar*)_data), step(_step == AUTO_STEP ? detail::default_step(type, _cols) : _step) + {} + + MatHeader(const std::vector &_dims, int type, void* _data) + : flags((type & TYPE_MASK)), data((uchar*)_data), step(0), dims(_dims) + {} + + MatHeader(const MatHeader& ) = default; + MatHeader(MatHeader&& src) : MatHeader(src) // reuse copy constructor here + { + MatHeader empty; //give it a name to call copy(not move) assignment below + src = empty; + } + MatHeader& operator=(const MatHeader& ) = default; + MatHeader& operator=(MatHeader&& src) + { + *this = src; //calling a copy assignment here, not move one + MatHeader empty; //give it a name to call copy(not move) assignment below + src = empty; + return *this; + } + /*! includes several bit-fields: + - depth + - number of channels + */ + int flags = 0; + + //! the number of rows and columns or (-1, -1) when the matrix has more than 2 dimensions + int rows = 0, cols = 0; + //! pointer to the data + uchar* data = nullptr; + size_t step = 0; + //! dimensions (ND-case) + std::vector dims; + }; + } // namespace detail + //concise version of cv::Mat suitable for GAPI needs (used when no dependence on OpenCV is required) + class Mat : public detail::MatHeader{ + public: + + Mat() = default; + + /** @overload + @param _rows Number of rows in a 2D array. + @param _cols Number of columns in a 2D array. + @param _type Array type. Use CV_8UC1, ..., CV_64FC4 to create 1-4 channel matrices, or + CV_8UC(n), ..., CV_64FC(n) to create multi-channel (up to CV_CN_MAX channels) matrices. + @param _data Pointer to the user data. Matrix constructors that take data and step parameters do not + allocate matrix data. Instead, they just initialize the matrix header that points to the specified + data, which means that no data is copied. This operation is very efficient and can be used to + process external data using OpenCV functions. The external data is not automatically deallocated, so + you should take care of it. + @param _step Number of bytes each matrix row occupies. The value should include the padding bytes at + the end of each row, if any. If the parameter is missing (set to AUTO_STEP ), no padding is assumed + and the actual step is calculated as cols*elemSize(). See Mat::elemSize. + */ + Mat(int _rows, int _cols, int _type, void* _data, size_t _step = AUTO_STEP) + : MatHeader (_rows, _cols, _type, _data, _step) + {} + + Mat(const std::vector &_dims, int _type, void* _data) + : MatHeader (_dims, _type, _data) + {} + + Mat(std::vector &&_dims, int _type, void* _data) + : MatHeader (std::move(_dims), _type, _data) + {} + + Mat(Mat const& src, const Rect& roi ) + : Mat(src) + { + rows = roi.height; + cols = roi.width; + data = ptr(roi.y, roi.x); + } + + Mat(Mat const& src) = default; + Mat(Mat&& src) = default; + + Mat& operator=(Mat const& src) = default; + Mat& operator=(Mat&& src) = default; + + /** @brief Sets all or some of the array elements to the specified value. + @param s Assigned scalar converted to the actual array type. + */ + Mat& operator = (const Scalar& s) + { + constexpr unsigned max_channels = 4; //Scalar can't fit more than 4 + using func_p_t = void (*)(void*, int, Scalar const&); + using detail::assign_row; + #define TABLE_ENTRY(type) {assign_row, assign_row, assign_row, assign_row} + static constexpr func_p_t func_tbl[][max_channels] = { + TABLE_ENTRY(uchar), + TABLE_ENTRY(schar), + TABLE_ENTRY(ushort), + TABLE_ENTRY(short), + TABLE_ENTRY(int), + TABLE_ENTRY(float), + TABLE_ENTRY(double) + }; + #undef TABLE_ENTRY + + static_assert(CV_8U == 0 && CV_8S == 1 && CV_16U == 2 && CV_16S == 3 + && CV_32S == 4 && CV_32F == 5 && CV_64F == 6, + "OCV type ids used as indexes to array, thus exact numbers are important!" + ); + + const auto depth = static_cast(this->depth()); + GAPI_Assert(depth < sizeof(func_tbl)/sizeof(func_tbl[0])); + + if (dims.empty()) + { + const auto channels = static_cast(this->channels()); + GAPI_Assert(channels <= max_channels); + + auto* f = func_tbl[depth][channels - 1]; + for (int r = 0; r < rows; ++r) + { + (*f)(static_cast(ptr(r)), cols, s ); + } + } + else + { + auto* f = func_tbl[depth][0]; + // FIXME: better to refactor assign_row to use std::size_t by default + (*f)(static_cast(data), static_cast(total()), s); + } + return *this; + } + + /** @brief Returns the matrix element size in bytes. + + The method returns the matrix element size in bytes. For example, if the matrix type is CV_16SC3 , + the method returns 3\*sizeof(short) or 6. + */ + size_t elemSize() const + { + return CV_ELEM_SIZE(type()); + } + /** @brief Returns the type of a matrix element. + + The method returns a matrix element type. This is an identifier compatible with the CvMat type + system, like CV_16SC3 or 16-bit signed 3-channel array, and so on. + */ + int type() const {return CV_MAT_TYPE(flags);} + + /** @brief Returns the depth of a matrix element. + + The method returns the identifier of the matrix element depth (the type of each individual channel). + For example, for a 16-bit signed element array, the method returns CV_16S . A complete list of + matrix types contains the following values: + - CV_8U - 8-bit unsigned integers ( 0..255 ) + - CV_8S - 8-bit signed integers ( -128..127 ) + - CV_16U - 16-bit unsigned integers ( 0..65535 ) + - CV_16S - 16-bit signed integers ( -32768..32767 ) + - CV_32S - 32-bit signed integers ( -2147483648..2147483647 ) + - CV_32F - 32-bit floating-point numbers ( -FLT_MAX..FLT_MAX, INF, NAN ) + - CV_64F - 64-bit floating-point numbers ( -DBL_MAX..DBL_MAX, INF, NAN ) + */ + int depth() const {return CV_MAT_DEPTH(flags);} + + /** @brief Returns the number of matrix channels. + + The method returns the number of matrix channels. + If matrix is N-dimensional, -1 is returned. + */ + int channels() const {return dims.empty() ? CV_MAT_CN(flags) : -1;} + + /** + @param _rows New number of rows. + @param _cols New number of columns. + @param _type New matrix type. + */ + void create(int _rows, int _cols, int _type) + { + create(Size{_cols, _rows}, _type); + } + /** @overload + @param _size Alternative new matrix size specification: Size(cols, rows) + @param _type New matrix type. + */ + void create(Size _size, int _type) + { + GAPI_Assert(_size.height >= 0 && _size.width >= 0); + if (_size != Size{cols, rows} ) + { + Mat tmp{_size.height, _size.width, _type, nullptr}; + tmp.memory.reset(new uchar[ tmp.step * tmp.rows], [](uchar * p){delete[] p;}); + tmp.data = tmp.memory.get(); + + *this = std::move(tmp); + } + } + + void create(const std::vector &_dims, int _type) + { + // FIXME: make a proper reallocation-on-demands + // WARNING: no tensor views, so no strides + Mat tmp{_dims, _type, nullptr}; + // FIXME: this accumulate duplicates a lot + const auto sz = std::accumulate(_dims.begin(), _dims.end(), 1, std::multiplies()); + tmp.memory.reset(new uchar[CV_ELEM_SIZE(_type)*sz], [](uchar * p){delete[] p;}); + tmp.data = tmp.memory.get(); + *this = std::move(tmp); + } + + /** @brief Copies the matrix to another one. + + The method copies the matrix data to another matrix. Before copying the data, the method invokes : + @code + m.create(this->size(), this->type()); + @endcode + so that the destination matrix is reallocated if needed. While m.copyTo(m); works flawlessly, the + function does not handle the case of a partial overlap between the source and the destination + matrices. + */ + void copyTo(Mat& dst) const + { + if (dims.empty()) + { + dst.create(rows, cols, type()); + for (int r = 0; r < rows; ++r) + { + std::copy_n(ptr(r), detail::default_step(type(),cols), dst.ptr(r)); + } + } + else + { + dst.create(dims, depth()); + std::copy_n(data, total()*elemSize(), data); + } + } + + /** @brief Returns true if the array has no elements. + + The method returns true if Mat::total() is 0 or if Mat::data is NULL. Because of pop_back() and + resize() methods `M.total() == 0` does not imply that `M.data == NULL`. + */ + bool empty() const + { + return data == 0 || total() == 0; + } + + /** @brief Returns the total number of array elements. + + The method returns the number of array elements (a number of pixels if the array represents an + image). + */ + size_t total() const + { + return dims.empty() + ? (static_cast(rows) * cols) + : std::accumulate(dims.begin(), dims.end(), static_cast(1), std::multiplies()); + } + + /** @overload + @param roi Extracted submatrix specified as a rectangle. + */ + Mat operator()( const Rect& roi ) const + { + return Mat{*this, roi}; + } + + + /** @brief Returns a pointer to the specified matrix row. + + The methods return `uchar*` or typed pointer to the specified matrix row. See the sample in + Mat::isContinuous to know how to use these methods. + @param row Index along the dimension 0 + @param col Index along the dimension 1 + */ + uchar* ptr(int row, int col = 0) + { + return const_cast(const_cast(this)->ptr(row,col)); + } + /** @overload */ + const uchar* ptr(int row, int col = 0) const + { + return data + step * row + CV_ELEM_SIZE(type()) * col; + } + + + private: + //actual memory allocated for storage, or nullptr if object is non owning view to over memory + std::shared_ptr memory; + }; + +} //namespace own +} //namespace gapi +} //namespace cv + +#endif /* OPENCV_GAPI_OWN_MAT_HPP */ diff --git a/IPL/include/opencv/opencv2/gapi/own/saturate.hpp b/IPL/include/opencv/opencv2/gapi/own/saturate.hpp new file mode 100644 index 0000000..5b23247 --- /dev/null +++ b/IPL/include/opencv/opencv2/gapi/own/saturate.hpp @@ -0,0 +1,90 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// +// Copyright (C) 2018 Intel Corporation + + +#ifndef OPENCV_GAPI_OWN_SATURATE_HPP +#define OPENCV_GAPI_OWN_SATURATE_HPP + +#include + +#include +#include + +#include + +namespace cv { namespace gapi { namespace own { +//----------------------------- +// +// Numeric cast with saturation +// +//----------------------------- + +template +static inline DST saturate(SRC x) +{ + // only integral types please! + GAPI_DbgAssert(std::is_integral::value && + std::is_integral::value); + + if (std::is_same::value) + return static_cast(x); + + if (sizeof(DST) > sizeof(SRC)) + return static_cast(x); + + // compiler must recognize this saturation, + // so compile saturate(a + b) with adds + // instruction (e.g.: _mm_adds_epi16 if x86) + return x < std::numeric_limits::min()? + std::numeric_limits::min(): + x > std::numeric_limits::max()? + std::numeric_limits::max(): + static_cast(x); +} + +// Note, that OpenCV rounds differently: +// - like std::round() for add, subtract +// - like std::rint() for multiply, divide +template +static inline DST saturate(SRC x, R round) +{ + if (std::is_floating_point::value) + { + return static_cast(x); + } + else if (std::is_integral::value) + { + GAPI_DbgAssert(std::is_integral::value && + std::is_integral::value); + return saturate(x); + } + else + { + GAPI_DbgAssert(std::is_integral::value && + std::is_floating_point::value); +#ifdef _WIN32 +// Suppress warning about converting x to floating-point +// Note that x is already floating-point at this point +#pragma warning(disable: 4244) +#endif + int ix = static_cast(round(x)); +#ifdef _WIN32 +#pragma warning(default: 4244) +#endif + return saturate(ix); + } +} + +// explicit suffix 'd' for double type +inline double ceild(double x) { return ceil(x); } +inline double floord(double x) { return floor(x); } +inline double roundd(double x) { return round(x); } +inline double rintd(double x) { return rint(x); } + +} //namespace own +} //namespace gapi +} //namespace cv +#endif /* OPENCV_GAPI_OWN_SATURATE_HPP */ diff --git a/IPL/include/opencv/opencv2/gapi/own/scalar.hpp b/IPL/include/opencv/opencv2/gapi/own/scalar.hpp new file mode 100644 index 0000000..bda91c8 --- /dev/null +++ b/IPL/include/opencv/opencv2/gapi/own/scalar.hpp @@ -0,0 +1,47 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// +// Copyright (C) 2018 Intel Corporation + + +#ifndef OPENCV_GAPI_GAPI_OWN_SCALAR_HPP +#define OPENCV_GAPI_GAPI_OWN_SCALAR_HPP + +#include + +namespace cv +{ +namespace gapi +{ +namespace own +{ + +class GAPI_EXPORTS Scalar +{ +public: + Scalar() = default; + explicit Scalar(double v0) { val[0] = v0; }; + Scalar(double v0, double v1, double v2 = 0, double v3 = 0) + : val{v0, v1, v2, v3} + { + } + + const double& operator[](int i) const { return val[i]; } + double& operator[](int i) { return val[i]; } + + static Scalar all(double v0) { return Scalar(v0, v0, v0, v0); } + + double val[4] = {0}; +}; + +inline bool operator==(const Scalar& lhs, const Scalar& rhs) +{ + return std::equal(std::begin(lhs.val), std::end(lhs.val), std::begin(rhs.val)); +} + +} // namespace own +} // namespace gapi +} // namespace cv + +#endif // OPENCV_GAPI_GAPI_OWN_SCALAR_HPP diff --git a/IPL/include/opencv/opencv2/gapi/own/types.hpp b/IPL/include/opencv/opencv2/gapi/own/types.hpp new file mode 100644 index 0000000..20445ee --- /dev/null +++ b/IPL/include/opencv/opencv2/gapi/own/types.hpp @@ -0,0 +1,135 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// +// Copyright (C) 2018 Intel Corporation + + +#ifndef OPENCV_GAPI_TYPES_HPP +#define OPENCV_GAPI_TYPES_HPP + +#include // std::max, std::min +#include + +namespace cv +{ +namespace gapi +{ +namespace own +{ + +class Point +{ +public: + Point() = default; + Point(int _x, int _y) : x(_x), y(_y) {}; + + int x = 0; + int y = 0; +}; + +class Rect +{ +public: + Rect() = default; + Rect(int _x, int _y, int _width, int _height) : x(_x), y(_y), width(_width), height(_height) {}; +#if !defined(GAPI_STANDALONE) + Rect(const cv::Rect& other) : x(other.x), y(other.y), width(other.width), height(other.height) {}; + inline Rect& operator=(const cv::Rect& other) + { + x = other.x; + y = other.x; + width = other.width; + height = other.height; + return *this; + } +#endif // !defined(GAPI_STANDALONE) + + int x = 0; //!< x coordinate of the top-left corner + int y = 0; //!< y coordinate of the top-left corner + int width = 0; //!< width of the rectangle + int height = 0; //!< height of the rectangle +}; + +inline bool operator==(const Rect& lhs, const Rect& rhs) +{ + return lhs.x == rhs.x && lhs.y == rhs.y && lhs.width == rhs.width && lhs.height == rhs.height; +} + +inline bool operator!=(const Rect& lhs, const Rect& rhs) +{ + return !(lhs == rhs); +} + +inline Rect& operator&=(Rect& lhs, const Rect& rhs) +{ + int x1 = std::max(lhs.x, rhs.x); + int y1 = std::max(lhs.y, rhs.y); + lhs.width = std::min(lhs.x + lhs.width, rhs.x + rhs.width) - x1; + lhs.height = std::min(lhs.y + lhs.height, rhs.y + rhs.height) - y1; + lhs.x = x1; + lhs.y = y1; + if( lhs.width <= 0 || lhs.height <= 0 ) + lhs = Rect(); + return lhs; +} + +inline const Rect operator&(const Rect& lhs, const Rect& rhs) +{ + Rect result = lhs; + return result &= rhs; +} + +inline std::ostream& operator<<(std::ostream& o, const Rect& rect) +{ + return o << "[" << rect.width << " x " << rect.height << " from (" << rect.x << ", " << rect.y << ")]"; +} + +class Size +{ +public: + Size() = default; + Size(int _width, int _height) : width(_width), height(_height) {}; +#if !defined(GAPI_STANDALONE) + Size(const cv::Size& other) : width(other.width), height(other.height) {}; + inline Size& operator=(const cv::Size& rhs) + { + width = rhs.width; + height = rhs.height; + return *this; + } +#endif // !defined(GAPI_STANDALONE) + + int width = 0; + int height = 0; +}; + +inline Size& operator+=(Size& lhs, const Size& rhs) +{ + lhs.width += rhs.width; + lhs.height += rhs.height; + return lhs; +} + +inline bool operator==(const Size& lhs, const Size& rhs) +{ + return lhs.width == rhs.width && lhs.height == rhs.height; +} + +inline bool operator!=(const Size& lhs, const Size& rhs) +{ + return !(lhs == rhs); +} + + +inline std::ostream& operator<<(std::ostream& o, const Size& s) +{ + o << "[" << s.width << " x " << s.height << "]"; + return o; +} + +} // namespace own +} // namespace gapi +} // namespace cv + +#endif // OPENCV_GAPI_TYPES_HPP diff --git a/IPL/include/opencv/opencv2/gapi/plaidml/core.hpp b/IPL/include/opencv/opencv2/gapi/plaidml/core.hpp new file mode 100644 index 0000000..47ac486 --- /dev/null +++ b/IPL/include/opencv/opencv2/gapi/plaidml/core.hpp @@ -0,0 +1,20 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// +// Copyright (C) 2019 Intel Corporation + + +#ifndef OPENCV_GAPI_PLAIDML_CORE_HPP +#define OPENCV_GAPI_PLAIDML_CORE_HPP + +#include // GKernelPackage +#include // GAPI_EXPORTS + +namespace cv { namespace gapi { namespace core { namespace plaidml { + +GAPI_EXPORTS GKernelPackage kernels(); + +}}}} + +#endif // OPENCV_GAPI_PLAIDML_CORE_HPP diff --git a/IPL/include/opencv/opencv2/gapi/plaidml/gplaidmlkernel.hpp b/IPL/include/opencv/opencv2/gapi/plaidml/gplaidmlkernel.hpp new file mode 100644 index 0000000..7ce00cf --- /dev/null +++ b/IPL/include/opencv/opencv2/gapi/plaidml/gplaidmlkernel.hpp @@ -0,0 +1,140 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// +// Copyright (C) 2019 Intel Corporation +// + + +#ifndef OPENCV_GAPI_GPLAIDMLKERNEL_HPP +#define OPENCV_GAPI_GPLAIDMLKERNEL_HPP + +#include +#include + +namespace plaidml +{ +namespace edsl +{ + class Tensor; +} // namespace edsl +} // namespace plaidml + +namespace cv +{ +namespace gapi +{ +namespace plaidml +{ + +GAPI_EXPORTS cv::gapi::GBackend backend(); + +} // namespace plaidml +} // namespace gapi + +struct GPlaidMLContext +{ + // Generic accessor API + template + const T& inArg(int input) { return m_args.at(input).get(); } + + // Syntax sugar + const plaidml::edsl::Tensor& inTensor(int input) + { + return inArg(input); + } + + plaidml::edsl::Tensor& outTensor(int output) + { + return *(m_results.at(output).get()); + } + + std::vector m_args; + std::unordered_map m_results; +}; + +class GAPI_EXPORTS GPlaidMLKernel +{ +public: + using F = std::function; + + GPlaidMLKernel() = default; + explicit GPlaidMLKernel(const F& f) : m_f(f) {}; + + void apply(GPlaidMLContext &ctx) const + { + GAPI_Assert(m_f); + m_f(ctx); + } + +protected: + F m_f; +}; + + +namespace detail +{ + +template struct plaidml_get_in; +template<> struct plaidml_get_in +{ + static const plaidml::edsl::Tensor& get(GPlaidMLContext& ctx, int idx) + { + return ctx.inTensor(idx); + } +}; + +template struct plaidml_get_in +{ + static T get(GPlaidMLContext &ctx, int idx) { return ctx.inArg(idx); } +}; + +template struct plaidml_get_out; +template<> struct plaidml_get_out +{ + static plaidml::edsl::Tensor& get(GPlaidMLContext& ctx, int idx) + { + return ctx.outTensor(idx); + } +}; + +template +struct PlaidMLCallHelper; + +template +struct PlaidMLCallHelper, std::tuple > +{ + template + static void call_impl(GPlaidMLContext &ctx, detail::Seq, detail::Seq) + { + Impl::run(plaidml_get_in::get(ctx, IIs)..., plaidml_get_out::get(ctx, OIs)...); + } + + static void call(GPlaidMLContext& ctx) + { + call_impl(ctx, + typename detail::MkSeq::type(), + typename detail::MkSeq::type()); + } +}; + +} // namespace detail + +template +class GPlaidMLKernelImpl: public cv::detail::PlaidMLCallHelper, + public cv::detail::KernelTag +{ + using P = detail::PlaidMLCallHelper; + +public: + using API = K; + + static cv::gapi::GBackend backend() { return cv::gapi::plaidml::backend(); } + static cv::GPlaidMLKernel kernel() { return GPlaidMLKernel(&P::call); } +}; + +#define GAPI_PLAIDML_KERNEL(Name, API) struct Name: public cv::GPlaidMLKernelImpl + +} // namespace cv + +#endif // OPENCV_GAPI_GPLAIDMLKERNEL_HPP diff --git a/IPL/include/opencv/opencv2/gapi/plaidml/plaidml.hpp b/IPL/include/opencv/opencv2/gapi/plaidml/plaidml.hpp new file mode 100644 index 0000000..bd12d25 --- /dev/null +++ b/IPL/include/opencv/opencv2/gapi/plaidml/plaidml.hpp @@ -0,0 +1,48 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// +// Copyright (C) 2019 Intel Corporation + + +#ifndef OPENCV_GAPI_PLAIDML_PLAIDML_HPP +#define OPENCV_GAPI_PLAIDML_PLAIDML_HPP + +#include +#include // CompileArgTag + +namespace cv +{ +namespace gapi +{ +namespace plaidml +{ + +/** \addtogroup gapi_compile_args + * @{ + */ +/** + * @brief This structure represents the basic parameters for the experimental + * PlaidML backend. + */ +struct config +{ + std::string dev_id; //!< Device ID. Refer to PlaidML documentation for details. + std::string trg_id; //!< Target ID. Refer to PlaidML documentation for details. +}; +/** @} gapi_compile_args */ + +} // namespace plaidml +} // namespace gapi + +namespace detail +{ + template<> struct CompileArgTag + { + static const char* tag() { return "gapi.plaidml.config"; } + }; +} // namespace detail + +} // namespace cv + +#endif // OPENCV_GAPI_PLAIDML_PLAIDML_HPP diff --git a/IPL/include/opencv/opencv2/gapi/render.hpp b/IPL/include/opencv/opencv2/gapi/render.hpp new file mode 100644 index 0000000..52e55b0 --- /dev/null +++ b/IPL/include/opencv/opencv2/gapi/render.hpp @@ -0,0 +1,14 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// +// Copyright (C) 2019 Intel Corporation + +#ifndef OPENCV_GAPI_RENDER_ROOT_HPP +#define OPENCV_GAPI_RENDER_ROOT_HPP + +// This file is just a shortcut to render/render.hpp + +#include + +#endif // OPENCV_GAPI_RENDER_ROOT_HPP diff --git a/IPL/include/opencv/opencv2/gapi/render/render.hpp b/IPL/include/opencv/opencv2/gapi/render/render.hpp new file mode 100644 index 0000000..fcb69cb --- /dev/null +++ b/IPL/include/opencv/opencv2/gapi/render/render.hpp @@ -0,0 +1,469 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// +// Copyright (C) 2018-2019 Intel Corporation + + +#ifndef OPENCV_GAPI_RENDER_HPP +#define OPENCV_GAPI_RENDER_HPP + +#include +#include + +#include +#include + +#include +#include +#include + + +/** \defgroup gapi_draw G-API Drawing and composition functionality + * @{ + * + * @brief Functions for in-graph drawing. + * + * @note This is a Work in Progress functionality and APIs may + * change in the future releases. + * + * G-API can do some in-graph drawing with a generic operations and a + * set of [rendering primitives](@ref gapi_draw_prims). + * In contrast with traditional OpenCV, in G-API user need to form a + * *rendering list* of primitives to draw. This list can be built + * manually or generated within a graph. This list is passed to + * [special operations or functions](@ref gapi_draw_api) where all + * primitives are interpreted and applied to the image. + * + * For example, in a complex pipeline a list of detected objects + * can be translated in-graph to a list of cv::gapi::wip::draw::Rect + * primitives to highlight those with bounding boxes, or a list of + * detected faces can be translated in-graph to a list of + * cv::gapi::wip::draw::Mosaic primitives to hide sensitive content + * or protect privacy. + * + * Like any other operations, rendering in G-API can be reimplemented + * by different backends. Currently only an OpenCV-based backend is + * available. + * + * In addition to the graph-level operations, there are also regular + * (immediate) OpenCV-like functions are available -- see + * cv::gapi::wip::draw::render(). These functions are just wrappers + * over regular G-API and build the rendering graphs on the fly, so + * take compilation arguments as parameters. + * + * Currently this API is more machine-oriented than human-oriented. + * The main purpose is to translate a set of domain-specific objects + * to a list of primitives to draw. For example, in order to generate + * a picture like this: + * + * ![](modules/gapi/doc/pics/render_example.png) + * + * Rendering list needs to be generated as follows: + * + * @include modules/gapi/samples/draw_example.cpp + * + * @defgroup gapi_draw_prims Drawing primitives + * @defgroup gapi_draw_api Drawing operations and functions + * @} + */ + +namespace cv +{ +namespace gapi +{ +namespace wip +{ +namespace draw +{ + +/** + * @brief This structure specifies which FreeType font to use by FText primitives. + */ +struct freetype_font +{ + /*@{*/ + std::string path; //!< The path to the font file (.ttf) + /*@{*/ +}; + +//! @addtogroup gapi_draw_prims +//! @{ +/** + * @brief This structure represents a text string to draw. + * + * Parameters match cv::putText(). + */ +struct Text +{ + /** + * @brief Text constructor + * + * @param text_ The text string to be drawn + * @param org_ The bottom-left corner of the text string in the image + * @param ff_ The font type, see #HersheyFonts + * @param fs_ The font scale factor that is multiplied by the font-specific base size + * @param color_ The text color + * @param thick_ The thickness of the lines used to draw a text + * @param lt_ The line type. See #LineTypes + * @param bottom_left_origin_ When true, the image data origin is at the bottom-left corner. Otherwise, it is at the top-left corner + */ + Text(const std::string& text_, + const cv::Point& org_, + int ff_, + double fs_, + const cv::Scalar& color_, + int thick_ = 1, + int lt_ = cv::LINE_8, + bool bottom_left_origin_ = false) : + text(text_), org(org_), ff(ff_), fs(fs_), + color(color_), thick(thick_), lt(lt_), bottom_left_origin(bottom_left_origin_) + { + } + + /*@{*/ + std::string text; //!< The text string to be drawn + cv::Point org; //!< The bottom-left corner of the text string in the image + int ff; //!< The font type, see #HersheyFonts + double fs; //!< The font scale factor that is multiplied by the font-specific base size + cv::Scalar color; //!< The text color + int thick; //!< The thickness of the lines used to draw a text + int lt; //!< The line type. See #LineTypes + bool bottom_left_origin; //!< When true, the image data origin is at the bottom-left corner. Otherwise, it is at the top-left corner + /*@{*/ +}; + +/** + * @brief This structure represents a text string to draw using + * FreeType renderer. + * + * If OpenCV is built without FreeType support, this primitive will + * fail at the execution stage. + */ +struct FText +{ + /** + * @brief FText constructor + * + * @param text_ The text string to be drawn + * @param org_ The bottom-left corner of the text string in the image + * @param fh_ The height of text + * @param color_ The text color + */ + FText(const std::wstring& text_, + const cv::Point& org_, + int fh_, + const cv::Scalar& color_) : + text(text_), org(org_), fh(fh_), color(color_) + { + } + + /*@{*/ + std::wstring text; //!< The text string to be drawn + cv::Point org; //!< The bottom-left corner of the text string in the image + int fh; //!< The height of text + cv::Scalar color; //!< The text color + /*@{*/ +}; + +/** + * @brief This structure represents a rectangle to draw. + * + * Parameters match cv::rectangle(). + */ +struct Rect +{ + /** + * @brief Rect constructor + * + * @param rect_ Coordinates of the rectangle + * @param color_ The bottom-left corner of the text string in the image + * @param thick_ The thickness of lines that make up the rectangle. Negative values, like #FILLED, mean that the function has to draw a filled rectangle + * @param lt_ The type of the line. See #LineTypes + * @param shift_ The number of fractional bits in the point coordinates + */ + Rect(const cv::Rect& rect_, + const cv::Scalar& color_, + int thick_ = 1, + int lt_ = cv::LINE_8, + int shift_ = 0) : + rect(rect_), color(color_), thick(thick_), lt(lt_), shift(shift_) + { + } + + /*@{*/ + cv::Rect rect; //!< Coordinates of the rectangle + cv::Scalar color; //!< The rectangle color or brightness (grayscale image) + int thick; //!< The thickness of lines that make up the rectangle. Negative values, like #FILLED, mean that the function has to draw a filled rectangle + int lt; //!< The type of the line. See #LineTypes + int shift; //!< The number of fractional bits in the point coordinates + /*@{*/ +}; + +/** + * @brief This structure represents a circle to draw. + * + * Parameters match cv::circle(). + */ +struct Circle +{ + /** + * @brief Circle constructor + * + * @param center_ The center of the circle + * @param radius_ The radius of the circle + * @param color_ The color of the circle + * @param thick_ The thickness of the circle outline, if positive. Negative values, like #FILLED, mean that a filled circle is to be drawn + * @param lt_ The Type of the circle boundary. See #LineTypes + * @param shift_ The Number of fractional bits in the coordinates of the center and in the radius value + */ + Circle(const cv::Point& center_, + int radius_, + const cv::Scalar& color_, + int thick_ = 1, + int lt_ = cv::LINE_8, + int shift_ = 0) : + center(center_), radius(radius_), color(color_), thick(thick_), lt(lt_), shift(shift_) + { + } + + /*@{*/ + cv::Point center; //!< The center of the circle + int radius; //!< The radius of the circle + cv::Scalar color; //!< The color of the circle + int thick; //!< The thickness of the circle outline, if positive. Negative values, like #FILLED, mean that a filled circle is to be drawn + int lt; //!< The Type of the circle boundary. See #LineTypes + int shift; //!< The Number of fractional bits in the coordinates of the center and in the radius value + /*@{*/ +}; + +/** + * @brief This structure represents a line to draw. + * + * Parameters match cv::line(). + */ +struct Line +{ + /** + * @brief Line constructor + * + * @param pt1_ The first point of the line segment + * @param pt2_ The second point of the line segment + * @param color_ The line color + * @param thick_ The thickness of line + * @param lt_ The Type of the line. See #LineTypes + * @param shift_ The number of fractional bits in the point coordinates + */ + Line(const cv::Point& pt1_, + const cv::Point& pt2_, + const cv::Scalar& color_, + int thick_ = 1, + int lt_ = cv::LINE_8, + int shift_ = 0) : + pt1(pt1_), pt2(pt2_), color(color_), thick(thick_), lt(lt_), shift(shift_) + { + } + + /*@{*/ + cv::Point pt1; //!< The first point of the line segment + cv::Point pt2; //!< The second point of the line segment + cv::Scalar color; //!< The line color + int thick; //!< The thickness of line + int lt; //!< The Type of the line. See #LineTypes + int shift; //!< The number of fractional bits in the point coordinates + /*@{*/ +}; + +/** + * @brief This structure represents a mosaicing operation. + * + * Mosaicing is a very basic method to obfuscate regions in the image. + */ +struct Mosaic +{ + /** + * @brief Mosaic constructor + * + * @param mos_ Coordinates of the mosaic + * @param cellSz_ Cell size (same for X, Y) + * @param decim_ Decimation (0 stands for no decimation) + */ + Mosaic(const cv::Rect& mos_, + int cellSz_, + int decim_) : + mos(mos_), cellSz(cellSz_), decim(decim_) + { + } + + /*@{*/ + cv::Rect mos; //!< Coordinates of the mosaic + int cellSz; //!< Cell size (same for X, Y) + int decim; //!< Decimation (0 stands for no decimation) + /*@{*/ +}; + +/** + * @brief This structure represents an image to draw. + * + * Image is blended on a frame using the specified mask. + */ +struct Image +{ + /** + * @brief Mosaic constructor + * + * @param org_ The bottom-left corner of the image + * @param img_ Image to draw + * @param alpha_ Alpha channel for image to draw (same size and number of channels) + */ + Image(const cv::Point& org_, + const cv::Mat& img_, + const cv::Mat& alpha_) : + org(org_), img(img_), alpha(alpha_) + { + } + + /*@{*/ + cv::Point org; //!< The bottom-left corner of the image + cv::Mat img; //!< Image to draw + cv::Mat alpha; //!< Alpha channel for image to draw (same size and number of channels) + /*@{*/ +}; + +/** + * @brief This structure represents a polygon to draw. + */ +struct Poly +{ + /** + * @brief Mosaic constructor + * + * @param points_ Points to connect + * @param color_ The line color + * @param thick_ The thickness of line + * @param lt_ The Type of the line. See #LineTypes + * @param shift_ The number of fractional bits in the point coordinate + */ + Poly(const std::vector& points_, + const cv::Scalar& color_, + int thick_ = 1, + int lt_ = cv::LINE_8, + int shift_ = 0) : + points(points_), color(color_), thick(thick_), lt(lt_), shift(shift_) + { + } + + /*@{*/ + std::vector points; //!< Points to connect + cv::Scalar color; //!< The line color + int thick; //!< The thickness of line + int lt; //!< The Type of the line. See #LineTypes + int shift; //!< The number of fractional bits in the point coordinate + /*@{*/ +}; + +using Prim = util::variant + < Text + , FText + , Rect + , Circle + , Line + , Mosaic + , Image + , Poly + >; + +using Prims = std::vector; +//! @} gapi_draw_prims + +using GMat2 = std::tuple; +using GMatDesc2 = std::tuple; + + +//! @addtogroup gapi_draw_api +//! @{ +/** @brief The function renders on the input image passed drawing primitivies + +@param bgr input image: 8-bit unsigned 3-channel image @ref CV_8UC3. +@param prims vector of drawing primitivies +@param args graph compile time parameters +*/ +void GAPI_EXPORTS render(cv::Mat& bgr, + const Prims& prims, + cv::GCompileArgs&& args = {}); + +/** @brief The function renders on two NV12 planes passed drawing primitivies + +@param y_plane input image: 8-bit unsigned 1-channel image @ref CV_8UC1. +@param uv_plane input image: 8-bit unsigned 2-channel image @ref CV_8UC2. +@param prims vector of drawing primitivies +@param args graph compile time parameters +*/ +void GAPI_EXPORTS render(cv::Mat& y_plane, + cv::Mat& uv_plane, + const Prims& prims, + cv::GCompileArgs&& args = {}); + +G_TYPED_KERNEL_M(GRenderNV12, )>, "org.opencv.render.nv12") +{ + static GMatDesc2 outMeta(GMatDesc y_plane, GMatDesc uv_plane, GArrayDesc) + { + return std::make_tuple(y_plane, uv_plane); + } +}; + +G_TYPED_KERNEL(GRenderBGR, )>, "org.opencv.render.bgr") +{ + static GMatDesc outMeta(GMatDesc bgr, GArrayDesc) + { + return bgr; + } +}; + +/** @brief Renders on 3 channels input + +Output image must be 8-bit unsigned planar 3-channel image + +@param src input image: 8-bit unsigned 3-channel image @ref CV_8UC3 +@param prims draw primitives +*/ +GAPI_EXPORTS GMat render3ch(const GMat& src, const GArray& prims); + +/** @brief Renders on two planes + +Output y image must be 8-bit unsigned planar 1-channel image @ref CV_8UC1 +uv image must be 8-bit unsigned planar 2-channel image @ref CV_8UC2 + +@param y input image: 8-bit unsigned 1-channel image @ref CV_8UC1 +@param uv input image: 8-bit unsigned 2-channel image @ref CV_8UC2 +@param prims draw primitives +*/ +GAPI_EXPORTS GMat2 renderNV12(const GMat& y, + const GMat& uv, + const GArray& prims); +//! @} gapi_draw_api + +} // namespace draw +} // namespace wip + +namespace render +{ +namespace ocv +{ + GAPI_EXPORTS cv::gapi::GKernelPackage kernels(); + +} // namespace ocv +} // namespace render +} // namespace gapi + +namespace detail +{ + template<> struct CompileArgTag + { + static const char* tag() { return "gapi.freetype_font"; } + }; +} // namespace detail + +} // namespace cv + +#endif // OPENCV_GAPI_RENDER_HPP diff --git a/IPL/include/opencv/opencv2/gapi/streaming/cap.hpp b/IPL/include/opencv/opencv2/gapi/streaming/cap.hpp new file mode 100644 index 0000000..faa5550 --- /dev/null +++ b/IPL/include/opencv/opencv2/gapi/streaming/cap.hpp @@ -0,0 +1,110 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// +// Copyright (C) 2019 Intel Corporation + +#ifndef OPENCV_GAPI_STREAMING_CAP_HPP +#define OPENCV_GAPI_STREAMING_CAP_HPP + +/** + * YOUR ATTENTION PLEASE! + * + * This is a header-only implementation of cv::VideoCapture-based + * Stream source. It is not built by default with G-API as G-API + * doesn't depend on videoio module. + * + * If you want to use it in your application, please make sure + * videioio is available in your OpenCV package and is linked to your + * application. + * + * Note for developers: please don't put videoio dependency in G-API + * because of this file. + */ + +#include +#include + +namespace cv { +namespace gapi { +namespace wip { + +/** + * @brief OpenCV's VideoCapture-based streaming source. + * + * This class implements IStreamSource interface. + * Its constructor takes the same parameters as cv::VideoCapture does. + * + * Please make sure that videoio OpenCV module is available before using + * this in your application (G-API doesn't depend on it directly). + * + * @note stream sources are passed to G-API via shared pointers, so + * please gapi::make_src<> to create objects and ptr() to pass a + * GCaptureSource to cv::gin(). + */ +class GCaptureSource: public IStreamSource +{ +public: + explicit GCaptureSource(int id) : cap(id) { prep(); } + explicit GCaptureSource(const std::string &path) : cap(path) { prep(); } + + // TODO: Add more constructor overloads to make it + // fully compatible with VideoCapture's interface. + +protected: + cv::VideoCapture cap; + cv::Mat first; + bool first_pulled = false; + + void prep() + { + // Prepare first frame to report its meta to engine + // when needed + GAPI_Assert(first.empty()); + cv::Mat tmp; + if (!cap.read(tmp)) + { + GAPI_Assert(false && "Couldn't grab the very first frame"); + } + // NOTE: Some decode/media VideoCapture backends continue + // owning the video buffer under cv::Mat so in order to + // process it safely in a highly concurrent pipeline, clone() + // is the only right way. + first = tmp.clone(); + } + + virtual bool pull(cv::gapi::wip::Data &data) override + { + if (!first_pulled) + { + GAPI_Assert(!first.empty()); + first_pulled = true; + data = first; // no need to clone here since it was cloned already + return true; + } + + if (!cap.isOpened()) return false; + + cv::Mat frame; + if (!cap.read(frame)) + { + // end-of-stream happened + return false; + } + // Same reason to clone as in prep() + data = frame.clone(); + return true; + } + + virtual GMetaArg descr_of() const override + { + GAPI_Assert(!first.empty()); + return cv::GMetaArg{cv::descr_of(first)}; + } +}; + +} // namespace wip +} // namespace gapi +} // namespace cv + +#endif // OPENCV_GAPI_STREAMING_CAP_HPP diff --git a/IPL/include/opencv/opencv2/gapi/streaming/source.hpp b/IPL/include/opencv/opencv2/gapi/streaming/source.hpp new file mode 100644 index 0000000..6597cad --- /dev/null +++ b/IPL/include/opencv/opencv2/gapi/streaming/source.hpp @@ -0,0 +1,62 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// +// Copyright (C) 2019 Intel Corporation + +#ifndef OPENCV_GAPI_STREAMING_SOURCE_HPP +#define OPENCV_GAPI_STREAMING_SOURCE_HPP + +#include // shared_ptr +#include // is_base_of + +#include // GMetaArg + + +namespace cv { +namespace gapi { +namespace wip { + struct Data; // "forward-declaration" of GRunArg + +/** + * @brief Abstract streaming pipeline source. + * + * Implement this interface if you want customize the way how data is + * streaming into GStreamingCompiled. + * + * Objects implementing this interface can be passed to + * GStreamingCompiled using setSource() with cv::gin(). Regular + * compiled graphs (GCompiled) don't support input objects of this + * type. + * + * Default cv::VideoCapture-based implementation is available, see + * cv::gapi::wip::GCaptureSource. + * + * @note stream sources are passed to G-API via shared pointers, so + * please use ptr() when passing a IStreamSource implementation to + * cv::gin(). + */ +class IStreamSource: public std::enable_shared_from_this +{ +public: + using Ptr = std::shared_ptr; + Ptr ptr() { return shared_from_this(); } + virtual bool pull(Data &data) = 0; + virtual GMetaArg descr_of() const = 0; + virtual ~IStreamSource() = default; +}; + +template +IStreamSource::Ptr inline make_src(Args&&... args) +{ + static_assert(std::is_base_of::value, + "T must implement the cv::gapi::IStreamSource interface!"); + auto src_ptr = std::make_shared(std::forward(args)...); + return src_ptr->ptr(); +} + +} // namespace wip +} // namespace gapi +} // namespace cv + +#endif // OPENCV_GAPI_STREAMING_SOURCE_HPP diff --git a/IPL/include/opencv/opencv2/gapi/util/any.hpp b/IPL/include/opencv/opencv2/gapi/util/any.hpp new file mode 100644 index 0000000..5f97e95 --- /dev/null +++ b/IPL/include/opencv/opencv2/gapi/util/any.hpp @@ -0,0 +1,186 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// +// Copyright (C) 2018 Intel Corporation + + +#ifndef OPENCV_GAPI_UTIL_ANY_HPP +#define OPENCV_GAPI_UTIL_ANY_HPP + +#include +#include +#include +#include + +#include + +#if defined(_MSC_VER) + // disable MSVC warning on "multiple copy constructors specified" +# pragma warning(disable: 4521) +#endif + +namespace cv +{ + +namespace internal +{ + template + T down_cast(Source operand) + { +#if defined(__GXX_RTTI) || defined(_CPPRTTI) + return dynamic_cast(operand); +#else + #warning used static cast instead of dynamic because RTTI is disabled + return static_cast(operand); +#endif + } +} + +namespace util +{ + class bad_any_cast : public std::bad_cast + { + public: + virtual const char* what() const noexcept override + { + return "Bad any cast"; + } + }; + + //modeled against C++17 std::any + + class any + { + private: + struct holder; + using holder_ptr = std::unique_ptr; + struct holder + { + virtual holder_ptr clone() = 0; + virtual ~holder() = default; + }; + + template + struct holder_impl : holder + { + value_t v; + template + holder_impl(arg_t&& a) : v(std::forward(a)) {} + holder_ptr clone() override { return holder_ptr(new holder_impl (v));} + }; + + holder_ptr hldr; + public: + template + any(value_t&& arg) : hldr(new holder_impl::type>( std::forward(arg))) {} + + any(any const& src) : hldr( src.hldr ? src.hldr->clone() : nullptr) {} + //simple hack in order not to write enable_if for the template constructor + any(any & src) : any (const_cast(src)) {} + + any() = default; + any(any&& ) = default; + + any& operator=(any&&) = default; + + any& operator=(any const& src) + { + any copy(src); + swap(*this, copy); + return *this; + } + + template + friend value_t* any_cast(any* operand); + + template + friend const value_t* any_cast(const any* operand); + + template + friend value_t& unsafe_any_cast(any& operand); + + template + friend const value_t& unsafe_any_cast(const any& operand); + + friend void swap(any & lhs, any& rhs) + { + swap(lhs.hldr, rhs.hldr); + } + + }; + + template + value_t* any_cast(any* operand) + { + auto casted = internal::down_cast::type> *>(operand->hldr.get()); + if (casted){ + return & (casted->v); + } + return nullptr; + } + + template + const value_t* any_cast(const any* operand) + { + auto casted = internal::down_cast::type> *>(operand->hldr.get()); + if (casted){ + return & (casted->v); + } + return nullptr; + } + + template + value_t& any_cast(any& operand) + { + auto ptr = any_cast(&operand); + if (ptr) + { + return *ptr; + } + + throw_error(bad_any_cast()); + } + + + template + const value_t& any_cast(const any& operand) + { + auto ptr = any_cast(&operand); + if (ptr) + { + return *ptr; + } + + throw_error(bad_any_cast()); + } + + template + inline value_t& unsafe_any_cast(any& operand) + { +#ifdef DEBUG + return any_cast(operand); +#else + return static_cast::type> *>(operand.hldr.get())->v; +#endif + } + + template + inline const value_t& unsafe_any_cast(const any& operand) + { +#ifdef DEBUG + return any_cast(operand); +#else + return static_cast::type> *>(operand.hldr.get())->v; +#endif + } + +} // namespace util +} // namespace cv + +#if defined(_MSC_VER) + // Enable "multiple copy constructors specified" back +# pragma warning(default: 4521) +#endif + +#endif // OPENCV_GAPI_UTIL_ANY_HPP diff --git a/IPL/include/opencv/opencv2/gapi/util/compiler_hints.hpp b/IPL/include/opencv/opencv2/gapi/util/compiler_hints.hpp new file mode 100644 index 0000000..a41a971 --- /dev/null +++ b/IPL/include/opencv/opencv2/gapi/util/compiler_hints.hpp @@ -0,0 +1,19 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// +// Copyright (C) 2018 Intel Corporation + +#ifndef OPENCV_GAPI_UTIL_COMPILER_HINTS_HPP +#define OPENCV_GAPI_UTIL_COMPILER_HINTS_HPP + +namespace cv +{ +namespace util +{ + //! Utility template function to prevent "unused" warnings by various compilers. + template void suppress_unused_warning( const T& ) {} +} // namespace util +} // namespace cv + +#endif /* OPENCV_GAPI_UTIL_COMPILER_HINTS_HPP */ diff --git a/IPL/include/opencv/opencv2/gapi/util/optional.hpp b/IPL/include/opencv/opencv2/gapi/util/optional.hpp new file mode 100644 index 0000000..1aa2b26 --- /dev/null +++ b/IPL/include/opencv/opencv2/gapi/util/optional.hpp @@ -0,0 +1,178 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// +// Copyright (C) 2018 Intel Corporation + + +#ifndef OPENCV_GAPI_UTIL_OPTIONAL_HPP +#define OPENCV_GAPI_UTIL_OPTIONAL_HPP + +#include + +// A poor man's `optional` implementation, incompletely modeled against C++17 spec. +namespace cv +{ +namespace util +{ + class bad_optional_access: public std::exception + { + public: + virtual const char *what() const noexcept override + { + return "Bad optional access"; + } + }; + + // TODO: nullopt_t + + // Interface /////////////////////////////////////////////////////////////// + template class optional + { + public: + // Constructors + // NB.: there were issues with Clang 3.8 when =default() was used + // instead {} + optional() {}; + optional(const optional&) = default; + explicit optional(T &&value) noexcept; + explicit optional(const T &value) noexcept; + optional(optional &&) noexcept; + // TODO: optional(nullopt_t) noexcept; + // TODO: optional(const optional &) + // TODO: optional(optional &&) + // TODO: optional(Args&&...) + // TODO: optional(initializer_list) + // TODO: optional(U&& value); + + // Assignment + optional& operator=(const optional& rhs) = default; + optional& operator=(optional&& rhs); + + // Observers + T* operator-> (); + const T* operator-> () const; + T& operator* (); + const T& operator* () const; + // TODO: && versions + + operator bool() const noexcept; + bool has_value() const noexcept; + + T& value(); + const T& value() const; + // TODO: && versions + + template + T value_or(U &&default_value) const; + + void swap(optional &other) noexcept; + void reset() noexcept; + // TODO: emplace + + // TODO: operator==, !=, <, <=, >, >= + + private: + struct nothing {}; + util::variant m_holder; + }; + + template + optional::type> make_optional(T&& value); + + // TODO: Args... and initializer_list versions + + // Implementation ////////////////////////////////////////////////////////// + template optional::optional(T &&v) noexcept + : m_holder(v) + { + } + + template optional::optional(const T &v) noexcept + : m_holder(v) + { + } + + template optional::optional(optional&& rhs) noexcept + : m_holder(std::move(rhs.m_holder)) + { + rhs.reset(); + } + + template optional& optional::operator=(optional&& rhs) + { + m_holder = std::move(rhs.m_holder); + rhs.reset(); + return *this; + } + + template T* optional::operator-> () + { + return & *(*this); + } + + template const T* optional::operator-> () const + { + return & *(*this); + } + + template T& optional::operator* () + { + return this->value(); + } + + template const T& optional::operator* () const + { + return this->value(); + } + + template optional::operator bool() const noexcept + { + return this->has_value(); + } + + template bool optional::has_value() const noexcept + { + return util::holds_alternative(m_holder); + } + + template T& optional::value() + { + if (!this->has_value()) + throw_error(bad_optional_access()); + return util::get(m_holder); + } + + template const T& optional::value() const + { + if (!this->has_value()) + throw_error(bad_optional_access()); + return util::get(m_holder); + } + + template + template T optional::value_or(U &&default_value) const + { + return (this->has_value() ? this->value() : T(default_value)); + } + + template void optional::swap(optional &other) noexcept + { + m_holder.swap(other.m_holder); + } + + template void optional::reset() noexcept + { + if (this->has_value()) + m_holder = nothing{}; + } + + template + optional::type> make_optional(T&& value) + { + return optional::type>(std::forward(value)); + } +} // namespace util +} // namespace cv + +#endif // OPENCV_GAPI_UTIL_OPTIONAL_HPP diff --git a/IPL/include/opencv/opencv2/gapi/util/throw.hpp b/IPL/include/opencv/opencv2/gapi/util/throw.hpp new file mode 100644 index 0000000..689bf58 --- /dev/null +++ b/IPL/include/opencv/opencv2/gapi/util/throw.hpp @@ -0,0 +1,36 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// +// Copyright (C) 2018 Intel Corporation + + +#ifndef OPENCV_GAPI_UTIL_THROW_HPP +#define OPENCV_GAPI_UTIL_THROW_HPP + +#include // std::forward + +#if !defined(__EXCEPTIONS) +#include +#include +#endif + +namespace cv +{ +namespace util +{ +template +[[noreturn]] void throw_error(ExceptionType &&e) +{ +#if defined(__EXCEPTIONS) || defined(_CPPUNWIND) + throw std::forward(e); +#else + fprintf(stderr, "An exception thrown! %s\n" , e.what()); + fflush(stderr); + abort(); +#endif +} +} // namespace util +} // namespace cv + +#endif // OPENCV_GAPI_UTIL_THROW_HPP diff --git a/IPL/include/opencv/opencv2/gapi/util/util.hpp b/IPL/include/opencv/opencv2/gapi/util/util.hpp new file mode 100644 index 0000000..afcf559 --- /dev/null +++ b/IPL/include/opencv/opencv2/gapi/util/util.hpp @@ -0,0 +1,124 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// +// Copyright (C) 2018-2019 Intel Corporation + + +#ifndef OPENCV_GAPI_UTIL_HPP +#define OPENCV_GAPI_UTIL_HPP + +#include + +// \cond HIDDEN_SYMBOLS +// This header file contains some generic utility functions which are +// used in other G-API Public API headers. +// +// PLEASE don't put any stuff here if it is NOT used in public API headers! + +namespace cv +{ +namespace detail +{ + // Recursive integer sequence type, useful for enumerating elements of + // template parameter packs. + template struct Seq { using next = Seq; }; + template struct MkSeq { using type = typename MkSeq::type::next; }; + template<> struct MkSeq<0>{ using type = Seq<>; }; + + // Checks if elements of variadic template satisfy the given Predicate. + // Implemented via tuple, with an interface to accept plain type lists + template class, typename, typename...> struct all_satisfy; + + template class F, typename T, typename... Ts> + struct all_satisfy > + { + static const constexpr bool value = F::value + && all_satisfy >::value; + }; + template class F, typename T> + struct all_satisfy > + { + static const constexpr bool value = F::value; + }; + + template class F, typename T, typename... Ts> + struct all_satisfy: public all_satisfy > {}; + + // Permute given tuple type C with given integer sequence II + // Sequence may be less than tuple C size. + template struct permute_tuple; + + template + struct permute_tuple > + { + using type = std::tuple< typename std::tuple_element::type... >; + }; + + // Given T..., generates a type sequence of sizeof...(T)-1 elements + // which is T... without its last element + // Implemented via tuple, with an interface to accept plain type lists + template struct all_but_last; + + template + struct all_but_last > + { + using C = std::tuple; + using S = typename MkSeq::value - 1>::type; + using type = typename permute_tuple::type; + }; + + template + struct all_but_last: public all_but_last > {}; + + template + using all_but_last_t = typename all_but_last::type; + + // NB.: This is here because there's no constexpr std::max in C++11 + template struct max_of_t + { + static constexpr const std::size_t rest = max_of_t::value; + static constexpr const std::size_t value = rest > S0 ? rest : S0; + }; + template struct max_of_t + { + static constexpr const std::size_t value = S; + }; + + template + struct contains : std::false_type{}; + + template + struct contains : std::integral_constant::value || + contains::value> {}; + template + struct contains> : std::integral_constant::value> {}; + + template + struct all_unique : std::true_type{}; + + template + struct all_unique : std::integral_constant::value && + all_unique::value> {}; + + template + struct tuple_wrap_helper; + + template struct tuple_wrap_helper + { + using type = std::tuple; + static type get(T&& obj) { return std::make_tuple(std::move(obj)); } + }; + + template + struct tuple_wrap_helper> + { + using type = std::tuple; + static type get(std::tuple&& objs) { return std::forward>(objs); } + }; +} // namespace detail +} // namespace cv + +// \endcond + +#endif // OPENCV_GAPI_UTIL_HPP diff --git a/IPL/include/opencv/opencv2/gapi/util/variant.hpp b/IPL/include/opencv/opencv2/gapi/util/variant.hpp new file mode 100644 index 0000000..22dfb2e --- /dev/null +++ b/IPL/include/opencv/opencv2/gapi/util/variant.hpp @@ -0,0 +1,392 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// +// Copyright (C) 2018 Intel Corporation + + +#ifndef OPENCV_GAPI_UTIL_VARIANT_HPP +#define OPENCV_GAPI_UTIL_VARIANT_HPP + +#include +#include + +#include +#include // max_of_t + +// A poor man's `variant` implementation, incompletely modeled against C++17 spec. +namespace cv +{ +namespace util +{ + namespace detail + { + template + struct type_list_index_helper + { + static const constexpr bool is_same = std::is_same::value; + static const constexpr std::size_t value = + std::conditional, type_list_index_helper>::type::value; + }; + + template + struct type_list_index_helper + { + static_assert(std::is_same::value, "Type not found"); + static const constexpr std::size_t value = I; + }; + + template< bool B, class T = void > + using enable_if_t = typename std::enable_if::type; + + template + using are_different_t = enable_if_t< + !std::is_same::type, + typename std::decay::type>::value, + V>; + } + + template + struct type_list_index + { + static const constexpr std::size_t value = detail::type_list_index_helper<0, Target, Types...>::value; + }; + + class bad_variant_access: public std::exception + { + public: + virtual const char *what() const noexcept override + { + return "Bad variant access"; + } + }; + + // Interface /////////////////////////////////////////////////////////////// + struct monostate {}; + inline bool operator==(const util::monostate&, const util::monostate&) + { + return true; + } + + template // FIXME: no references, arrays, and void + class variant + { + // FIXME: Replace with std::aligned_union after gcc4.8 support is dropped + static constexpr const std::size_t S = cv::detail::max_of_t::value; + static constexpr const std::size_t A = cv::detail::max_of_t::value; + using Memory = typename std::aligned_storage::type[1]; + + template struct cctr_h { + static void help(Memory memory, const Memory from) { + new (memory) T(*reinterpret_cast(from)); + } + }; + + template struct vctr_h { + static void help(Memory memory, const void* pval) { + new (memory) T(*reinterpret_cast(pval)); + } + }; + + template struct mctr_h { + static void help(Memory memory, void *pval) { + new (memory) T(std::move(*reinterpret_cast(pval))); + } + }; + + template struct copy_h { + static void help(Memory to, const Memory from) { + *reinterpret_cast(to) = *reinterpret_cast(from); + } + }; + + template struct move_h { + static void help(Memory to, const Memory from) { + *reinterpret_cast(to) = std::move(*reinterpret_cast(from)); + } + }; + + template struct swap_h { + static void help(Memory to, Memory from) { + std::swap(*reinterpret_cast(to), *reinterpret_cast(from)); + } + }; + + template struct dtor_h { + static void help(Memory memory) { + (void) memory; // MSCV warning + reinterpret_cast(memory)->~T(); + } + }; + + template struct equal_h { + static bool help(const Memory lhs, const Memory rhs) { + const T& t_lhs = *reinterpret_cast(lhs); + const T& t_rhs = *reinterpret_cast(rhs); + return t_lhs == t_rhs; + } + }; + + typedef void (*CCtr) (Memory, const Memory); // Copy c-tor (variant) + typedef void (*VCtr) (Memory, const void*); // Copy c-tor (value) + typedef void (*MCtr) (Memory, void*); // Generic move c-tor + typedef void (*Copy) (Memory, const Memory); // Copy assignment + typedef void (*Move) (Memory, const Memory); // Move assignment + typedef void (*Swap) (Memory, Memory); // Swap + typedef void (*Dtor) (Memory); // Destructor + + typedef bool (*Equal)(const Memory, const Memory); // Equality test (external) + + static constexpr std::array cctrs(){ return {{(&cctr_h::help)...}};} + static constexpr std::array vctrs(){ return {{(&vctr_h::help)...}};} + static constexpr std::array mctrs(){ return {{(&mctr_h::help)...}};} + static constexpr std::array cpyrs(){ return {{(©_h::help)...}};} + static constexpr std::array mvers(){ return {{(&move_h::help)...}};} + static constexpr std::array swprs(){ return {{(&swap_h::help)...}};} + static constexpr std::array dtors(){ return {{(&dtor_h::help)...}};} + + std::size_t m_index = 0; + + protected: + template friend T& get(variant &v); + template friend const T& get(const variant &v); + template friend bool operator==(const variant &lhs, + const variant &rhs); + Memory memory; + + public: + // Constructors + variant() noexcept; + variant(const variant& other); + variant(variant&& other) noexcept; + template explicit variant(const T& t); + // are_different_t is a SFINAE trick to avoid variant(T &&t) with T=variant + // for some reason, this version is called instead of variant(variant&& o) when + // variant is used in STL containers (examples: vector assignment). + // detail::enable_if_t::value> is a SFINAE + // trick to limit this constructor only to rvalue reference argument + template< + typename T, + typename = detail::are_different_t, + typename = detail::enable_if_t::value> + > + explicit variant(T&& t); + // template explicit variant(Args&&... args); + // FIXME: other constructors + + // Destructor + ~variant(); + + // Assignment + variant& operator=(const variant& rhs); + variant& operator=(variant &&rhs) noexcept; + + // SFINAE trick to avoid operator=(T&&) with T=variant<>, see comment above + template< + typename T, + typename = detail::are_different_t + > + variant& operator=(T&& t) noexcept; + + // Observers + std::size_t index() const noexcept; + // FIXME: valueless_by_exception() + + // Modifiers + // FIXME: emplace() + void swap(variant &rhs) noexcept; + + // Non-C++17x! + template static constexpr std::size_t index_of(); + }; + + // FIMXE: visit + + template + T& get(util::variant &v); + + template + const T& get(const util::variant &v); + + template + bool holds_alternative(const util::variant &v) noexcept; + + // FIXME: T&&, const TT&& versions. + + // Implementation ////////////////////////////////////////////////////////// + template + variant::variant() noexcept + { + typedef typename std::tuple_element<0, std::tuple >::type TFirst; + new (memory) TFirst(); + } + + template + variant::variant(const variant &other) + : m_index(other.m_index) + { + (cctrs()[m_index])(memory, other.memory); + } + + template + variant::variant(variant &&other) noexcept + : m_index(other.m_index) + { + (mctrs()[m_index])(memory, other.memory); + } + + template + template + variant::variant(const T& t) + : m_index(util::type_list_index::value) + { + (vctrs()[m_index])(memory, &t); + } + + template + template + variant::variant(T&& t) + : m_index(util::type_list_index::type, Ts...>::value) + { + (mctrs()[m_index])(memory, &t); + } + + template + variant::~variant() + { + (dtors()[m_index])(memory); + } + + template + variant& variant::operator=(const variant &rhs) + { + if (m_index != rhs.m_index) + { + (dtors()[ m_index])(memory); + (cctrs()[rhs.m_index])(memory, rhs.memory); + m_index = rhs.m_index; + } + else + { + (cpyrs()[rhs.m_index])(memory, rhs.memory); + } + return *this; + } + + template + variant& variant::operator=(variant &&rhs) noexcept + { + if (m_index != rhs.m_index) + { + (dtors()[ m_index])(memory); + (mctrs()[rhs.m_index])(memory, rhs.memory); + m_index = rhs.m_index; + } + else + { + (mvers()[rhs.m_index])(memory, rhs.memory); + } + return *this; + } + + template + template + variant& variant::operator=(T&& t) noexcept + { + using decayed_t = typename std::decay::type; + // FIXME: No version with implicit type conversion available! + static const constexpr std::size_t t_index = + util::type_list_index::value; + + if (t_index == m_index) + { + util::get(*this) = std::forward(t); + return *this; + } + else return (*this = variant(std::forward(t))); + } + + template + std::size_t util::variant::index() const noexcept + { + return m_index; + } + + template + void variant::swap(variant &rhs) noexcept + { + if (m_index == rhs.index()) + { + (swprs()[m_index](memory, rhs.memory)); + } + else + { + variant tmp(std::move(*this)); + *this = std::move(rhs); + rhs = std::move(tmp); + } + } + + template + template + constexpr std::size_t variant::index_of() + { + return util::type_list_index::value; // FIXME: tests! + } + + template + T& get(util::variant &v) + { + const constexpr std::size_t t_index = + util::type_list_index::value; + + if (v.index() == t_index) + return *(T*)(&v.memory); // workaround for ICC 2019 + // original code: return reinterpret_cast(v.memory); + else + throw_error(bad_variant_access()); + } + + template + const T& get(const util::variant &v) + { + const constexpr std::size_t t_index = + util::type_list_index::value; + + if (v.index() == t_index) + return *(const T*)(&v.memory); // workaround for ICC 2019 + // original code: return reinterpret_cast(v.memory); + else + throw_error(bad_variant_access()); + } + + template + bool holds_alternative(const util::variant &v) noexcept + { + return v.index() == util::variant::template index_of(); + } + + template bool operator==(const variant &lhs, + const variant &rhs) + { + using V = variant; + + // Instantiate table only here since it requires operator== for + // should have operator== only if this one is used, not in general + static const std::array eqs = { + {(&V::template equal_h::help)...} + }; + if (lhs.index() != rhs.index()) + return false; + return (eqs[lhs.index()])(lhs.memory, rhs.memory); + } + + template bool operator!=(const variant &lhs, + const variant &rhs) + { + return !(lhs == rhs); + } +} // namespace cv +} // namespace util + +#endif // OPENCV_GAPI_UTIL_VARIANT_HPP diff --git a/IPL/include/opencv/opencv2/gapi/video.hpp b/IPL/include/opencv/opencv2/gapi/video.hpp new file mode 100644 index 0000000..7602e1d --- /dev/null +++ b/IPL/include/opencv/opencv2/gapi/video.hpp @@ -0,0 +1,135 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// +// Copyright (C) 2020 Intel Corporation + +#ifndef OPENCV_GAPI_VIDEO_HPP +#define OPENCV_GAPI_VIDEO_HPP + +#include // std::tuple + +#include + + +/** \defgroup gapi_video G-API Video processing functionality + */ + +namespace cv { namespace gapi { +namespace video +{ + using GOptFlowLKOutput = std::tuple, + cv::GArray, + cv::GArray>; + + G_TYPED_KERNEL(GCalcOptFlowLK, + ,cv::GArray,Size, + int,TermCriteria,int,double)>, + "org.opencv.video.calcOpticalFlowPyrLK") + { + static std::tuple outMeta(GMatDesc,GMatDesc,GArrayDesc, + GArrayDesc,const Size&,int, + const TermCriteria&,int,double) + { + return std::make_tuple(empty_array_desc(), empty_array_desc(), empty_array_desc()); + } + + }; + + G_TYPED_KERNEL(GCalcOptFlowLKForPyr, + ,cv::GArray, + cv::GArray,cv::GArray,Size,int, + TermCriteria,int,double)>, + "org.opencv.video.calcOpticalFlowPyrLKForPyr") + { + static std::tuple outMeta(GArrayDesc,GArrayDesc, + GArrayDesc,GArrayDesc, + const Size&,int, + const TermCriteria&,int,double) + { + return std::make_tuple(empty_array_desc(), empty_array_desc(), empty_array_desc()); + } + }; +} //namespace video + +//! @addtogroup gapi_video +//! @{ +/** @brief Calculates an optical flow for a sparse feature set using the iterative Lucas-Kanade +method with pyramids. + +See @cite Bouguet00 . + +@note Function textual ID is "org.opencv.video.calcOpticalFlowPyrLK" + +@param prevImg first 8-bit input image (GMat) or pyramid (GArray) constructed by +buildOpticalFlowPyramid. +@param nextImg second input image (GMat) or pyramid (GArray) of the same size and the same +type as prevImg. +@param prevPts GArray of 2D points for which the flow needs to be found; point coordinates must be +single-precision floating-point numbers. +@param predPts GArray of 2D points initial for the flow search; make sense only when +OPTFLOW_USE_INITIAL_FLOW flag is passed; in that case the vector must have the same size as in +the input. +@param winSize size of the search window at each pyramid level. +@param maxLevel 0-based maximal pyramid level number; if set to 0, pyramids are not used (single +level), if set to 1, two levels are used, and so on; if pyramids are passed to input then +algorithm will use as many levels as pyramids have but no more than maxLevel. +@param criteria parameter, specifying the termination criteria of the iterative search algorithm +(after the specified maximum number of iterations criteria.maxCount or when the search window +moves by less than criteria.epsilon). +@param flags operation flags: + - **OPTFLOW_USE_INITIAL_FLOW** uses initial estimations, stored in nextPts; if the flag is + not set, then prevPts is copied to nextPts and is considered the initial estimate. + - **OPTFLOW_LK_GET_MIN_EIGENVALS** use minimum eigen values as an error measure (see + minEigThreshold description); if the flag is not set, then L1 distance between patches + around the original and a moved point, divided by number of pixels in a window, is used as a + error measure. +@param minEigThresh the algorithm calculates the minimum eigen value of a 2x2 normal matrix of +optical flow equations (this matrix is called a spatial gradient matrix in @cite Bouguet00), divided +by number of pixels in a window; if this value is less than minEigThreshold, then a corresponding +feature is filtered out and its flow is not processed, so it allows to remove bad points and get a +performance boost. + +@return GArray of 2D points (with single-precision floating-point coordinates) +containing the calculated new positions of input features in the second image. +@return status GArray (of unsigned chars); each element of the vector is set to 1 if +the flow for the corresponding features has been found, otherwise, it is set to 0. +@return GArray of errors (doubles); each element of the vector is set to an error for the +corresponding feature, type of the error measure can be set in flags parameter; if the flow wasn't +found then the error is not defined (use the status parameter to find such cases). + */ +GAPI_EXPORTS std::tuple, GArray, GArray> +calcOpticalFlowPyrLK(const GMat &prevImg, + const GMat &nextImg, + const GArray &prevPts, + const GArray &predPts, + const Size &winSize = Size(21, 21), + int maxLevel = 3, + const TermCriteria &criteria = TermCriteria(TermCriteria::COUNT | + TermCriteria::EPS, + 30, 0.01), + int flags = 0, + double minEigThresh = 1e-4); + +/** +@overload +@note Function textual ID is "org.opencv.video.calcOpticalFlowPyrLKForPyr" +*/ +GAPI_EXPORTS std::tuple, GArray, GArray> +calcOpticalFlowPyrLK(const GArray &prevPyr, + const GArray &nextPyr, + const GArray &prevPts, + const GArray &predPts, + const Size &winSize = Size(21, 21), + int maxLevel = 3, + const TermCriteria &criteria = TermCriteria(TermCriteria::COUNT | + TermCriteria::EPS, + 30, 0.01), + int flags = 0, + double minEigThresh = 1e-4); + +//! @} gapi_video +} //namespace gapi +} //namespace cv + +#endif // OPENCV_GAPI_VIDEO_HPP diff --git a/IPL/include/opencv/opencv2/hfs.hpp b/IPL/include/opencv/opencv2/hfs.hpp new file mode 100644 index 0000000..948f117 --- /dev/null +++ b/IPL/include/opencv/opencv2/hfs.hpp @@ -0,0 +1,153 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#include "opencv2/core.hpp" + +namespace cv { namespace hfs { + +/** @defgroup hfs Hierarchical Feature Selection for Efficient Image Segmentation + +The opencv hfs module contains an efficient algorithm to segment an image. +This module is implemented based on the paper Hierarchical Feature Selection for Efficient +Image Segmentation, ECCV 2016. The original project was developed by +Yun Liu(https://github.com/yun-liu/hfs). + + +Introduction to Hierarchical Feature Selection +---------------------------------------------- + + +This algorithm is executed in 3 stages: + +In the first stage, the algorithm uses SLIC (simple linear iterative clustering) algorithm +to obtain the superpixel of the input image. + +In the second stage, the algorithm view each superpixel as a node in the graph. +It will calculate a feature vector for each edge of the graph. It then calculates a weight +for each edge based on the feature vector and trained SVM parameters. After obtaining +weight for each edge, it will exploit EGB (Efficient Graph-based Image Segmentation) +algorithm to merge some nodes in the graph thus obtaining a coarser segmentation +After these operations, a post process will be executed to merge regions that are smaller +then a specific number of pixels into their nearby region. + +In the third stage, the algorithm exploits the similar mechanism to further merge +the small regions obtained in the second stage into even coarser segmentation. + +After these three stages, we can obtain the final segmentation of the image. +For further details about the algorithm, please refer to the original paper: +Hierarchical Feature Selection for Efficient Image Segmentation, ECCV 2016 + +*/ + +//! @addtogroup hfs +//! @{ +class CV_EXPORTS_W HfsSegment : public Algorithm { +public: + +/** @brief: set and get the parameter segEgbThresholdI. +* This parameter is used in the second stage mentioned above. +* It is a constant used to threshold weights of the edge when merging +* adjacent nodes when applying EGB algorithm. The segmentation result +* tends to have more regions remained if this value is large and vice versa. +*/ +CV_WRAP virtual void setSegEgbThresholdI(float c) = 0; +CV_WRAP virtual float getSegEgbThresholdI() = 0; + + +/** @brief: set and get the parameter minRegionSizeI. +* This parameter is used in the second stage +* mentioned above. After the EGB segmentation, regions that have fewer +* pixels then this parameter will be merged into it's adjacent region. +*/ +CV_WRAP virtual void setMinRegionSizeI(int n) = 0; +CV_WRAP virtual int getMinRegionSizeI() = 0; + + +/** @brief: set and get the parameter segEgbThresholdII. +* This parameter is used in the third stage +* mentioned above. It serves the same purpose as segEgbThresholdI. +* The segmentation result tends to have more regions remained if +* this value is large and vice versa. +*/ +CV_WRAP virtual void setSegEgbThresholdII(float c) = 0; +CV_WRAP virtual float getSegEgbThresholdII() = 0; + + +/** @brief: set and get the parameter minRegionSizeII. +* This parameter is used in the third stage +* mentioned above. It serves the same purpose as minRegionSizeI +*/ +CV_WRAP virtual void setMinRegionSizeII(int n) = 0; +CV_WRAP virtual int getMinRegionSizeII() = 0; + + +/** @brief: set and get the parameter spatialWeight. +* This parameter is used in the first stage +* mentioned above(the SLIC stage). It describes how important is the role +* of position when calculating the distance between each pixel and it's +* center. The exact formula to calculate the distance is +* \f$colorDistance + spatialWeight \times spatialDistance\f$. +* The segmentation result tends to have more local consistency +* if this value is larger. +*/ +CV_WRAP virtual void setSpatialWeight(float w) = 0; +CV_WRAP virtual float getSpatialWeight() = 0; + + +/** @brief: set and get the parameter slicSpixelSize. +* This parameter is used in the first stage mentioned +* above(the SLIC stage). It describes the size of each +* superpixel when initializing SLIC. Every superpixel +* approximately has \f$slicSpixelSize \times slicSpixelSize\f$ +* pixels in the beginning. +*/ +CV_WRAP virtual void setSlicSpixelSize(int n) = 0; +CV_WRAP virtual int getSlicSpixelSize() = 0; + + +/** @brief: set and get the parameter numSlicIter. +* This parameter is used in the first stage. It +* describes how many iteration to perform when executing SLIC. +*/ +CV_WRAP virtual void setNumSlicIter(int n) = 0; +CV_WRAP virtual int getNumSlicIter() = 0; + +/** @brief do segmentation gpu +* @param src: the input image +* @param ifDraw: if draw the image in the returned Mat. if this parameter is false, +* then the content of the returned Mat is a matrix of index, describing the region +* each pixel belongs to. And it's data type is CV_16U. If this parameter is true, +* then the returned Mat is a segmented picture, and color of each region is the +* average color of all pixels in that region. And it's data type is the same as +* the input image +*/ +CV_WRAP virtual Mat performSegmentGpu(InputArray src, bool ifDraw = true) = 0; + +/** @brief do segmentation with cpu +* This method is only implemented for reference. +* It is highly NOT recommanded to use it. +*/ +CV_WRAP virtual Mat performSegmentCpu(InputArray src, bool ifDraw = true) = 0; + +/** @brief: create a hfs object +* @param height: the height of the input image +* @param width: the width of the input image +* @param segEgbThresholdI: parameter segEgbThresholdI +* @param minRegionSizeI: parameter minRegionSizeI +* @param segEgbThresholdII: parameter segEgbThresholdII +* @param minRegionSizeII: parameter minRegionSizeII +* @param spatialWeight: parameter spatialWeight +* @param slicSpixelSize: parameter slicSpixelSize +* @param numSlicIter: parameter numSlicIter +*/ +CV_WRAP static Ptr create(int height, int width, + float segEgbThresholdI = 0.08f, int minRegionSizeI = 100, + float segEgbThresholdII = 0.28f, int minRegionSizeII = 200, + float spatialWeight = 0.6f, int slicSpixelSize = 8, int numSlicIter = 5); + +}; + +//! @} + +}} // namespace cv { namespace hfs { diff --git a/IPL/include/opencv/opencv2/highgui.hpp b/IPL/include/opencv/opencv2/highgui.hpp index 41bd8af..f109640 100644 --- a/IPL/include/opencv/opencv2/highgui.hpp +++ b/IPL/include/opencv/opencv2/highgui.hpp @@ -40,12 +40,16 @@ // //M*/ -#ifndef __OPENCV_HIGHGUI_HPP__ -#define __OPENCV_HIGHGUI_HPP__ +#ifndef OPENCV_HIGHGUI_HPP +#define OPENCV_HIGHGUI_HPP #include "opencv2/core.hpp" +#ifdef HAVE_OPENCV_IMGCODECS #include "opencv2/imgcodecs.hpp" +#endif +#ifdef HAVE_OPENCV_VIDEOIO #include "opencv2/videoio.hpp" +#endif /** @defgroup highgui High-level GUI @@ -182,15 +186,19 @@ enum WindowFlags { WINDOW_FULLSCREEN = 1, //!< change the window to fullscreen. WINDOW_FREERATIO = 0x00000100, //!< the image expends as much as it can (no ratio constraint). - WINDOW_KEEPRATIO = 0x00000000 //!< the ratio of the image is respected. - }; + WINDOW_KEEPRATIO = 0x00000000, //!< the ratio of the image is respected. + WINDOW_GUI_EXPANDED=0x00000000, //!< status bar and tool bar + WINDOW_GUI_NORMAL = 0x00000010, //!< old fashious way + }; //! Flags for cv::setWindowProperty / cv::getWindowProperty enum WindowPropertyFlags { WND_PROP_FULLSCREEN = 0, //!< fullscreen property (can be WINDOW_NORMAL or WINDOW_FULLSCREEN). WND_PROP_AUTOSIZE = 1, //!< autosize property (can be WINDOW_NORMAL or WINDOW_AUTOSIZE). WND_PROP_ASPECT_RATIO = 2, //!< window's aspect ration (can be set to WINDOW_FREERATIO or WINDOW_KEEPRATIO). - WND_PROP_OPENGL = 3 //!< opengl support. + WND_PROP_OPENGL = 3, //!< opengl support. + WND_PROP_VISIBLE = 4, //!< checks whether the window exists and is visible + WND_PROP_TOPMOST = 5 //!< property to toggle normal window being topmost or not }; //! Mouse Events see cv::MouseCallback @@ -237,9 +245,10 @@ enum QtFontStyles { //! Qt "button" type enum QtButtonTypes { - QT_PUSH_BUTTON = 0, //!< Push button. - QT_CHECKBOX = 1, //!< Checkbox button. - QT_RADIOBOX = 2 //!< Radiobox button. + QT_PUSH_BUTTON = 0, //!< Push button. + QT_CHECKBOX = 1, //!< Checkbox button. + QT_RADIOBOX = 2, //!< Radiobox button. + QT_NEW_BUTTONBAR = 1024 //!< Button should create a new buttonbar }; /** @brief Callback function for mouse events. see cv::setMouseCallback @@ -287,9 +296,9 @@ Qt backend supports additional flags: displayed image (see imshow ), and you cannot change the window size manually. - **WINDOW_FREERATIO or WINDOW_KEEPRATIO:** WINDOW_FREERATIO adjusts the image with no respect to its ratio, whereas WINDOW_KEEPRATIO keeps the image ratio. - - **CV_GUI_NORMAL or CV_GUI_EXPANDED:** CV_GUI_NORMAL is the old way to draw the window - without statusbar and toolbar, whereas CV_GUI_EXPANDED is a new enhanced GUI. -By default, flags == WINDOW_AUTOSIZE | WINDOW_KEEPRATIO | CV_GUI_EXPANDED + - **WINDOW_GUI_NORMAL or WINDOW_GUI_EXPANDED:** WINDOW_GUI_NORMAL is the old way to draw the window + without statusbar and toolbar, whereas WINDOW_GUI_EXPANDED is a new enhanced GUI. +By default, flags == WINDOW_AUTOSIZE | WINDOW_KEEPRATIO | WINDOW_GUI_EXPANDED @param winname Name of the window in the window caption that may be used as a window identifier. @param flags Flags of the window. The supported flags are: (cv::WindowFlags) @@ -312,6 +321,15 @@ CV_EXPORTS_W void destroyAllWindows(); CV_EXPORTS_W int startWindowThread(); +/** @brief Similar to #waitKey, but returns full key code. + +@note + +Key code is implementation specific and depends on used backend: QT/GTK/Win32/etc + +*/ +CV_EXPORTS_W int waitKeyEx(int delay = 0); + /** @brief Waits for a pressed key. The function waitKey waits for a key event infinitely (when \f$\texttt{delay}\leq 0\f$ ) or for delay @@ -344,7 +362,7 @@ Otherwise, the image is scaled to fit the window. The function may scale the ima - If the image is 8-bit unsigned, it is displayed as is. - If the image is 16-bit unsigned or 32-bit integer, the pixels are divided by 256. That is, the value range [0,255\*256] is mapped to [0,255]. -- If the image is 32-bit floating-point, the pixel values are multiplied by 255. That is, the +- If the image is 32-bit or 64-bit floating-point, the pixel values are multiplied by 255. That is, the value range [0,1] is mapped to [0,255]. If window was created with OpenGL support, cv::imshow also support ogl::Buffer , ogl::Texture2D and @@ -384,6 +402,12 @@ CV_EXPORTS_W void imshow(const String& winname, InputArray mat); */ CV_EXPORTS_W void resizeWindow(const String& winname, int width, int height); +/** @overload +@param winname Window name. +@param size The new window size. +*/ +CV_EXPORTS_W void resizeWindow(const String& winname, const cv::Size& size); + /** @brief Moves window to the specified position @param winname Name of the window. @@ -419,12 +443,23 @@ The function getWindowProperty returns properties of a window. */ CV_EXPORTS_W double getWindowProperty(const String& winname, int prop_id); +/** @brief Provides rectangle of image in the window. + +The function getWindowImageRect returns the client screen coordinates, width and height of the image rendering area. + +@param winname Name of the window. + +@sa resizeWindow moveWindow + */ +CV_EXPORTS_W Rect getWindowImageRect(const String& winname); + +/** @example samples/cpp/create_mask.cpp +This program demonstrates using mouse events and how to make and use a mask image (black and white) . +*/ /** @brief Sets mouse handler for the specified window @param winname Name of the window. -@param onMouse Mouse callback. See OpenCV samples, such as -, on how to specify and -use the callback. +@param onMouse Callback function for mouse events. See OpenCV samples on how to specify and use the callback. @param userdata The optional parameter passed to the callback. */ CV_EXPORTS void setMouseCallback(const String& winname, MouseCallback onMouse, void* userdata = 0); @@ -451,6 +486,44 @@ Mouse-wheel events are currently supported only on Windows. */ CV_EXPORTS int getMouseWheelDelta(int flags); +/** @brief Selects ROI on the given image. +Function creates a window and allows user to select a ROI using mouse. +Controls: use `space` or `enter` to finish selection, use key `c` to cancel selection (function will return the zero cv::Rect). + +@param windowName name of the window where selection process will be shown. +@param img image to select a ROI. +@param showCrosshair if true crosshair of selection rectangle will be shown. +@param fromCenter if true center of selection will match initial mouse position. In opposite case a corner of +selection rectangle will correspont to the initial mouse position. +@return selected ROI or empty rect if selection canceled. + +@note The function sets it's own mouse callback for specified window using cv::setMouseCallback(windowName, ...). +After finish of work an empty callback will be set for the used window. + */ +CV_EXPORTS_W Rect selectROI(const String& windowName, InputArray img, bool showCrosshair = true, bool fromCenter = false); + +/** @overload + */ +CV_EXPORTS_W Rect selectROI(InputArray img, bool showCrosshair = true, bool fromCenter = false); + +/** @brief Selects ROIs on the given image. +Function creates a window and allows user to select a ROIs using mouse. +Controls: use `space` or `enter` to finish current selection and start a new one, +use `esc` to terminate multiple ROI selection process. + +@param windowName name of the window where selection process will be shown. +@param img image to select a ROI. +@param boundingBoxes selected ROIs. +@param showCrosshair if true crosshair of selection rectangle will be shown. +@param fromCenter if true center of selection will match initial mouse position. In opposite case a corner of +selection rectangle will correspont to the initial mouse position. + +@note The function sets it's own mouse callback for specified window using cv::setMouseCallback(windowName, ...). +After finish of work an empty callback will be set for the used window. + */ +CV_EXPORTS_W void selectROIs(const String& windowName, InputArray img, + CV_OUT std::vector& boundingBoxes, bool showCrosshair = true, bool fromCenter = false); + /** @brief Creates a trackbar and attaches it to the specified window. The function createTrackbar creates a trackbar (a slider or range control) with the specified name @@ -460,7 +533,7 @@ displayed in the specified window winname. @note -[__Qt Backend Only__] winname can be empty (or NULL) if the trackbar should be attached to the +[__Qt Backend Only__] winname can be empty if the trackbar should be attached to the control panel. Clicking the label of each trackbar enables editing the trackbar values manually. @@ -488,7 +561,7 @@ The function returns the current position of the specified trackbar. @note -[__Qt Backend Only__] winname can be empty (or NULL) if the trackbar is attached to the control +[__Qt Backend Only__] winname can be empty if the trackbar is attached to the control panel. @param trackbarname Name of the trackbar. @@ -502,7 +575,7 @@ The function sets the position of the specified trackbar in the specified window @note -[__Qt Backend Only__] winname can be empty (or NULL) if the trackbar is attached to the control +[__Qt Backend Only__] winname can be empty if the trackbar is attached to the control panel. @param trackbarname Name of the trackbar. @@ -517,7 +590,7 @@ The function sets the maximum position of the specified trackbar in the specifie @note -[__Qt Backend Only__] winname can be empty (or NULL) if the trackbar is attached to the control +[__Qt Backend Only__] winname can be empty if the trackbar is attached to the control panel. @param trackbarname Name of the trackbar. @@ -532,12 +605,12 @@ The function sets the minimum position of the specified trackbar in the specifie @note -[__Qt Backend Only__] winname can be empty (or NULL) if the trackbar is attached to the control +[__Qt Backend Only__] winname can be empty if the trackbar is attached to the control panel. @param trackbarname Name of the trackbar. @param winname Name of the window that is the parent of trackbar. -@param minval New maximum position. +@param minval New minimum position. */ CV_EXPORTS_W void setTrackbarMin(const String& trackbarname, const String& winname, int minval); @@ -663,6 +736,23 @@ The function addText draws *text* on the image *img* using a specific font *font */ CV_EXPORTS void addText( const Mat& img, const String& text, Point org, const QtFont& font); +/** @brief Draws a text on the image. + +@param img 8-bit 3-channel image where the text should be drawn. +@param text Text to write on an image. +@param org Point(x,y) where the text should start on an image. +@param nameFont Name of the font. The name should match the name of a system font (such as +*Times*). If the font is not found, a default one is used. +@param pointSize Size of the font. If not specified, equal zero or negative, the point size of the +font is set to a system-dependent default value. Generally, this is 12 points. +@param color Color of the font in BGRA where A = 255 is fully transparent. +@param weight Font weight. Available operation flags are : cv::QtFontWeights You can also specify a positive integer for better control. +@param style Font style. Available operation flags are : cv::QtFontStyles +@param spacing Spacing between characters. It can be negative or positive. + */ +CV_EXPORTS_W void addText(const Mat& img, const String& text, Point org, const String& nameFont, int pointSize = -1, Scalar color = Scalar::all(0), + int weight = QT_FONT_NORMAL, int style = QT_STYLE_NORMAL, int spacing = 0); + /** @brief Displays a text on a window image as an overlay for a specified duration. The function displayOverlay displays useful information/tips on top of the window for a certain @@ -675,7 +765,7 @@ after the specified delay the original content of the window is restored. function is called before the previous overlay text timed out, the timer is restarted and the text is updated. If this value is zero, the text never disappears. */ -CV_EXPORTS void displayOverlay(const String& winname, const String& text, int delayms = 0); +CV_EXPORTS_W void displayOverlay(const String& winname, const String& text, int delayms = 0); /** @brief Displays a text on the window statusbar during the specified period of time. @@ -689,7 +779,7 @@ created with the CV_GUI_EXPANDED flags). the previous text timed out, the timer is restarted and the text is updated. If this value is zero, the text never disappears. */ -CV_EXPORTS void displayStatusBar(const String& winname, const String& text, int delayms = 0); +CV_EXPORTS_W void displayStatusBar(const String& winname, const String& text, int delayms = 0); /** @brief Saves parameters of the specified window. @@ -717,15 +807,17 @@ CV_EXPORTS void stopLoop(); The function createButton attaches a button to the control panel. Each button is added to a buttonbar to the right of the last button. A new buttonbar is created if nothing was attached to the -control panel before, or if the last element attached to the control panel was a trackbar. +control panel before, or if the last element attached to the control panel was a trackbar or if the +QT_NEW_BUTTONBAR flag is added to the type. See below various examples of the cv::createButton function call: : @code - createButton(NULL,callbackButton);//create a push button "button 0", that will call callbackButton. + createButton("",callbackButton);//create a push button "button 0", that will call callbackButton. createButton("button2",callbackButton,NULL,QT_CHECKBOX,0); createButton("button3",callbackButton,&value); createButton("button5",callbackButton1,NULL,QT_RADIOBOX); createButton("button6",callbackButton2,NULL,QT_PUSH_BUTTON,1); + createButton("button6",callbackButton2,NULL,QT_PUSH_BUTTON|QT_NEW_BUTTONBAR);// create a push button in a new row @endcode @param bar_name Name of the button. @@ -747,8 +839,4 @@ CV_EXPORTS int createButton( const String& bar_name, ButtonCallback on_change, } // cv -#ifndef DISABLE_OPENCV_24_COMPATIBILITY -#include "opencv2/highgui/highgui_c.h" -#endif - #endif diff --git a/IPL/include/opencv/opencv2/highgui/highgui_c.h b/IPL/include/opencv/opencv2/highgui/highgui_c.h index 47fdb84..5d20b95 100644 --- a/IPL/include/opencv/opencv2/highgui/highgui_c.h +++ b/IPL/include/opencv/opencv2/highgui/highgui_c.h @@ -39,13 +39,11 @@ // //M*/ -#ifndef __OPENCV_HIGHGUI_H__ -#define __OPENCV_HIGHGUI_H__ +#ifndef OPENCV_HIGHGUI_H +#define OPENCV_HIGHGUI_H #include "opencv2/core/core_c.h" #include "opencv2/imgproc/imgproc_c.h" -#include "opencv2/imgcodecs/imgcodecs_c.h" -#include "opencv2/videoio/videoio_c.h" #ifdef __cplusplus extern "C" { @@ -107,6 +105,7 @@ enum CV_WND_PROP_AUTOSIZE = 1, //to change/get window's autosize property CV_WND_PROP_ASPECTRATIO= 2, //to change/get window's aspectratio property CV_WND_PROP_OPENGL = 3, //to change/get window's opengl support + CV_WND_PROP_VISIBLE = 4, //These 2 flags are used by cvNamedWindow and cvSet/GetWindowProperty CV_WINDOW_NORMAL = 0x00000000, //the user can resize the window (no constraint) / also use to switch a fullscreen window to a normal size @@ -130,6 +129,11 @@ CVAPI(int) cvNamedWindow( const char* name, int flags CV_DEFAULT(CV_WINDOW_AUTOS CVAPI(void) cvSetWindowProperty(const char* name, int prop_id, double prop_value); CVAPI(double) cvGetWindowProperty(const char* name, int prop_id); +#ifdef __cplusplus // FIXIT remove in OpenCV 4.0 +/* Get window image rectangle coordinates, width and height */ +CVAPI(cv::Rect)cvGetWindowImageRect(const char* name); +#endif + /* display image within window (highgui windows remember their content) */ CVAPI(void) cvShowImage( const char* name, const CvArr* image ); @@ -234,7 +238,7 @@ CVAPI(void) cvUpdateWindow(const char* window_name); #define set_preprocess_func cvSetPreprocessFuncWin32 #define set_postprocess_func cvSetPostprocessFuncWin32 -#if defined WIN32 || defined _WIN32 +#if defined _WIN32 CVAPI(void) cvSetPreprocessFuncWin32_(const void* callback); CVAPI(void) cvSetPostprocessFuncWin32_(const void* callback); diff --git a/IPL/include/opencv/opencv2/img_hash.hpp b/IPL/include/opencv/opencv2/img_hash.hpp new file mode 100644 index 0000000..5e7a928 --- /dev/null +++ b/IPL/include/opencv/opencv2/img_hash.hpp @@ -0,0 +1,78 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#ifndef OPENCV_IMG_HASH_H +#define OPENCV_IMG_HASH_H + +#include "opencv2/img_hash/average_hash.hpp" +#include "opencv2/img_hash/block_mean_hash.hpp" +#include "opencv2/img_hash/color_moment_hash.hpp" +#include "opencv2/img_hash/marr_hildreth_hash.hpp" +#include "opencv2/img_hash/phash.hpp" +#include "opencv2/img_hash/radial_variance_hash.hpp" + +/** +@defgroup img_hash The module brings implementations of different image hashing algorithms. + +Provide algorithms to extract the hash of images and fast way to figure out most similar images in +huge data set. + +Namespace for all functions is cv::img_hash. + +### Supported Algorithms + +- Average hash (also called Different hash) +- PHash (also called Perceptual hash) +- Marr Hildreth Hash +- Radial Variance Hash +- Block Mean Hash (modes 0 and 1) +- Color Moment Hash (this is the one and only hash algorithm resist to rotation attack(-90~90 degree)) + +You can study more about image hashing from following paper and websites: + +- "Implementation and benchmarking of perceptual image hash functions" @cite zauner2010implementation +- "Looks Like It" @cite lookslikeit + +### Code Example + +@include samples/hash_samples.cpp + +### Performance under different attacks + +![Performance chart](img_hash/doc/attack_performance.JPG) + +### Speed comparison with PHash library (100 images from ukbench) + +![Hash Computation chart](img_hash/doc/hash_computation_chart.JPG) +![Hash comparison chart](img_hash/doc/hash_comparison_chart.JPG) + +As you can see, hash computation speed of img_hash module outperform [PHash library](http://www.phash.org/) a lot. + +PS : I do not list out the comparison of Average hash, PHash and Color Moment hash, because I cannot +find them in PHash. + +### Motivation + +Collects useful image hash algorithms into opencv, so we do not need to rewrite them by ourselves +again and again or rely on another 3rd party library(ex : PHash library). BOVW or correlation +matching are good and robust, but they are very slow compare with image hash, if you need to deal +with large scale CBIR(content based image retrieval) problem, image hash is a more reasonable +solution. + +### More info + +You can learn more about img_hash modules from following links, these links show you how to find +similar image from ukbench dataset, provide thorough benchmark of different attacks(contrast, blur, +noise(gaussion,pepper and salt), jpeg compression, watermark, resize). + +* [Introduction to image hash module of opencv](http://qtandopencv.blogspot.my/2016/06/introduction-to-image-hash-module-of.html) +* [Speed up image hashing of opencv(img_hash) and introduce color moment hash](http://qtandopencv.blogspot.my/2016/06/speed-up-image-hashing-of-opencvimghash.html) + +### Contributors + +Tham Ngap Wei, thamngapwei@gmail.com + +*/ + +#endif // OPENCV_IMG_HASH_H diff --git a/IPL/include/opencv/opencv2/img_hash/average_hash.hpp b/IPL/include/opencv/opencv2/img_hash/average_hash.hpp new file mode 100644 index 0000000..1204441 --- /dev/null +++ b/IPL/include/opencv/opencv2/img_hash/average_hash.hpp @@ -0,0 +1,39 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#ifndef OPENCV_AVERAGE_HASH_HPP +#define OPENCV_AVERAGE_HASH_HPP + +#include "img_hash_base.hpp" + +namespace cv { +namespace img_hash { + +//! @addtogroup img_hash +//! @{ + +/** @brief Computes average hash value of the input image + +This is a fast image hashing algorithm, but only work on simple case. For more details, please +refer to @cite lookslikeit +*/ +class CV_EXPORTS_W AverageHash : public ImgHashBase +{ +public: + CV_WRAP static Ptr create(); +protected: + AverageHash() {} +}; + +/** @brief Calculates img_hash::AverageHash in one call +@param inputArr input image want to compute hash value, type should be CV_8UC4, CV_8UC3 or CV_8UC1. +@param outputArr Hash value of input, it will contain 16 hex decimal number, return type is CV_8U +*/ +CV_EXPORTS_W void averageHash(cv::InputArray inputArr, cv::OutputArray outputArr); + +//! @} + +}} // cv::img_hash:: + +#endif // OPENCV_AVERAGE_HASH_HPP diff --git a/IPL/include/opencv/opencv2/img_hash/block_mean_hash.hpp b/IPL/include/opencv/opencv2/img_hash/block_mean_hash.hpp new file mode 100644 index 0000000..dbf2a5f --- /dev/null +++ b/IPL/include/opencv/opencv2/img_hash/block_mean_hash.hpp @@ -0,0 +1,52 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#ifndef OPENCV_BLOCK_MEAN_HASH_HPP +#define OPENCV_BLOCK_MEAN_HASH_HPP + +#include "img_hash_base.hpp" + +namespace cv { +namespace img_hash { + +//! @addtogroup img_hash +//! @{ + +enum BlockMeanHashMode +{ + BLOCK_MEAN_HASH_MODE_0 = 0, //!< use fewer block and generate 16*16/8 uchar hash value + BLOCK_MEAN_HASH_MODE_1 = 1, //!< use block blocks(step sizes/2), generate 31*31/8 + 1 uchar hash value +}; + +/** @brief Image hash based on block mean. + +See @cite zauner2010implementation for details. +*/ +class CV_EXPORTS_W BlockMeanHash : public ImgHashBase +{ +public: + /** @brief Create BlockMeanHash object + @param mode the mode + */ + CV_WRAP void setMode(int mode); + CV_WRAP std::vector getMean() const; + CV_WRAP static Ptr create(int mode = BLOCK_MEAN_HASH_MODE_0); +protected: + BlockMeanHash() {} +}; + +/** @brief Computes block mean hash of the input image + @param inputArr input image want to compute hash value, type should be CV_8UC4, CV_8UC3 or CV_8UC1. + @param outputArr Hash value of input, it will contain 16 hex decimal number, return type is CV_8U + @param mode the mode +*/ +CV_EXPORTS_W void blockMeanHash(cv::InputArray inputArr, + cv::OutputArray outputArr, + int mode = BLOCK_MEAN_HASH_MODE_0); + +//! @} + +}} // cv::img_hash:: + +#endif // OPENCV_BLOCK_MEAN_HASH_HPP diff --git a/IPL/include/opencv/opencv2/img_hash/color_moment_hash.hpp b/IPL/include/opencv/opencv2/img_hash/color_moment_hash.hpp new file mode 100644 index 0000000..d0a820b --- /dev/null +++ b/IPL/include/opencv/opencv2/img_hash/color_moment_hash.hpp @@ -0,0 +1,41 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#ifndef OPENCV_COLOR_MOMENT_HASH_HPP +#define OPENCV_COLOR_MOMENT_HASH_HPP + +#include "img_hash_base.hpp" + +namespace cv { +namespace img_hash { + +//! @addtogroup img_hash +//! @{ + +/** @brief Image hash based on color moments. + +See @cite tang2012perceptual for details. +*/ +class CV_EXPORTS_W ColorMomentHash : public ImgHashBase +{ +public: + CV_WRAP static Ptr create(); +protected: + ColorMomentHash() {} +}; + +/** @brief Computes color moment hash of the input, the algorithm + is come from the paper "Perceptual Hashing for Color Images + Using Invariant Moments" + @param inputArr input image want to compute hash value, + type should be CV_8UC4, CV_8UC3 or CV_8UC1. + @param outputArr 42 hash values with type CV_64F(double) + */ +CV_EXPORTS_W void colorMomentHash(cv::InputArray inputArr, cv::OutputArray outputArr); + +//! @} + +}} // cv::img_hash:: + +#endif // OPENCV_COLOR_MOMENT_HASH_HPP diff --git a/IPL/include/opencv/opencv2/img_hash/img_hash_base.hpp b/IPL/include/opencv/opencv2/img_hash/img_hash_base.hpp new file mode 100644 index 0000000..f0cc451 --- /dev/null +++ b/IPL/include/opencv/opencv2/img_hash/img_hash_base.hpp @@ -0,0 +1,46 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#ifndef OPENCV_IMG_HASH_BASE_HPP +#define OPENCV_IMG_HASH_BASE_HPP + +#include "opencv2/core.hpp" + +namespace cv { +namespace img_hash { + +//! @addtogroup img_hash +//! @{ + +/** @brief The base class for image hash algorithms + */ +class CV_EXPORTS_W ImgHashBase : public Algorithm +{ +public: + class ImgHashImpl; + + ~ImgHashBase(); + /** @brief Computes hash of the input image + @param inputArr input image want to compute hash value + @param outputArr hash of the image + */ + CV_WRAP void compute(cv::InputArray inputArr, cv::OutputArray outputArr); + /** @brief Compare the hash value between inOne and inTwo + @param hashOne Hash value one + @param hashTwo Hash value two + @return value indicate similarity between inOne and inTwo, the meaning + of the value vary from algorithms to algorithms + */ + CV_WRAP double compare(cv::InputArray hashOne, cv::InputArray hashTwo) const; +protected: + ImgHashBase(); +protected: + Ptr pImpl; +}; + +//! @} + +} } // cv::img_hash:: + +#endif // OPENCV_IMG_HASH_BASE_HPP diff --git a/IPL/include/opencv/opencv2/img_hash/marr_hildreth_hash.hpp b/IPL/include/opencv/opencv2/img_hash/marr_hildreth_hash.hpp new file mode 100644 index 0000000..a9b04f9 --- /dev/null +++ b/IPL/include/opencv/opencv2/img_hash/marr_hildreth_hash.hpp @@ -0,0 +1,64 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#ifndef OPENCV_MARR_HILDRETH_HASH_HPP +#define OPENCV_MARR_HILDRETH_HASH_HPP + +#include "img_hash_base.hpp" + +namespace cv { +namespace img_hash { + +//! @addtogroup img_hash +//! @{ + +/** @brief Marr-Hildreth Operator Based Hash, slowest but more discriminative. + +See @cite zauner2010implementation for details. +*/ +class CV_EXPORTS_W MarrHildrethHash : public ImgHashBase +{ +public: + /** + * @brief self explain + */ + CV_WRAP float getAlpha() const; + + /** + * @brief self explain + */ + CV_WRAP float getScale() const; + + /** @brief Set Mh kernel parameters + @param alpha int scale factor for marr wavelet (default=2). + @param scale int level of scale factor (default = 1) + */ + CV_WRAP void setKernelParam(float alpha, float scale); + + /** + @param alpha int scale factor for marr wavelet (default=2). + @param scale int level of scale factor (default = 1) + */ + CV_WRAP static Ptr create(float alpha = 2.0f, float scale = 1.0f); +protected: + MarrHildrethHash() {} +}; + +/** @brief Computes average hash value of the input image + @param inputArr input image want to compute hash value, + type should be CV_8UC4, CV_8UC3, CV_8UC1. + @param outputArr Hash value of input, it will contain 16 hex + decimal number, return type is CV_8U + @param alpha int scale factor for marr wavelet (default=2). + @param scale int level of scale factor (default = 1) +*/ +CV_EXPORTS_W void marrHildrethHash(cv::InputArray inputArr, + cv::OutputArray outputArr, + float alpha = 2.0f, float scale = 1.0f); + +//! @} + +}} // cv::img_hash:: + +#endif // OPENCV_MARR_HILDRETH_HASH_HPP diff --git a/IPL/include/opencv/opencv2/img_hash/phash.hpp b/IPL/include/opencv/opencv2/img_hash/phash.hpp new file mode 100644 index 0000000..d57cd6f --- /dev/null +++ b/IPL/include/opencv/opencv2/img_hash/phash.hpp @@ -0,0 +1,41 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#ifndef OPENCV_PHASH_HPP +#define OPENCV_PHASH_HPP + +#include "img_hash_base.hpp" + +namespace cv { +namespace img_hash { + +//! @addtogroup img_hash +//! @{ + +/** @brief pHash + +Slower than average_hash, but tolerant of minor modifications + +This algorithm can combat more variation than averageHash, for more details please refer to @cite lookslikeit +*/ +class CV_EXPORTS_W PHash : public ImgHashBase +{ +public: + CV_WRAP static Ptr create(); +protected: + PHash() {} +}; + +/** @brief Computes pHash value of the input image + @param inputArr input image want to compute hash value, + type should be CV_8UC4, CV_8UC3, CV_8UC1. + @param outputArr Hash value of input, it will contain 8 uchar value +*/ +CV_EXPORTS_W void pHash(cv::InputArray inputArr, cv::OutputArray outputArr); + +//! @} + +} } // cv::img_hash:: + +#endif // OPENCV_PHASH_HPP diff --git a/IPL/include/opencv/opencv2/img_hash/radial_variance_hash.hpp b/IPL/include/opencv/opencv2/img_hash/radial_variance_hash.hpp new file mode 100644 index 0000000..455f285 --- /dev/null +++ b/IPL/include/opencv/opencv2/img_hash/radial_variance_hash.hpp @@ -0,0 +1,58 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#ifndef OPENCV_RADIAL_VARIANCE_HASH_HPP +#define OPENCV_RADIAL_VARIANCE_HASH_HPP + +#include "img_hash_base.hpp" + +namespace cv { +namespace img_hash { + +//! @addtogroup img_hash +//! @{ + + +/** @brief Image hash based on Radon transform. + +See @cite tang2012perceptual for details. +*/ +class CV_EXPORTS_W RadialVarianceHash : public ImgHashBase +{ +public: + CV_WRAP static Ptr create(double sigma = 1, int numOfAngleLine = 180); + + CV_WRAP int getNumOfAngleLine() const; + CV_WRAP double getSigma() const; + + CV_WRAP void setNumOfAngleLine(int value); + CV_WRAP void setSigma(double value); + + // internals + std::vector getFeatures(); + cv::Mat getHash(); + Mat getPixPerLine(Mat const &input); + Mat getProjection(); +protected: + RadialVarianceHash() {} +}; + +/** @brief Computes radial variance hash of the input image + @param inputArr input image want to compute hash value, + type should be CV_8UC4, CV_8UC3, CV_8UC1. + @param outputArr Hash value of input + @param sigma Gaussian kernel standard deviation + @param numOfAngleLine The number of angles to consider + */ +CV_EXPORTS_W void radialVarianceHash(cv::InputArray inputArr, + cv::OutputArray outputArr, + double sigma = 1, + int numOfAngleLine = 180); + + +//! @} + +}} // cv::img_hash:: + +#endif // OPENCV_RADIAL_VARIANCE_HASH_HPP diff --git a/IPL/include/opencv/opencv2/imgcodecs.hpp b/IPL/include/opencv/opencv2/imgcodecs.hpp index ac0fd24..89b014d 100644 --- a/IPL/include/opencv/opencv2/imgcodecs.hpp +++ b/IPL/include/opencv/opencv2/imgcodecs.hpp @@ -40,8 +40,8 @@ // //M*/ -#ifndef __OPENCV_IMGCODECS_HPP__ -#define __OPENCV_IMGCODECS_HPP__ +#ifndef OPENCV_IMGCODECS_HPP +#define OPENCV_IMGCODECS_HPP #include "opencv2/core.hpp" @@ -62,8 +62,8 @@ namespace cv //! Imread flags enum ImreadModes { - IMREAD_UNCHANGED = -1, //!< If set, return the loaded image as is (with alpha channel, otherwise it gets cropped). - IMREAD_GRAYSCALE = 0, //!< If set, always convert image to the single channel grayscale image. + IMREAD_UNCHANGED = -1, //!< If set, return the loaded image as is (with alpha channel, otherwise it gets cropped). Ignore EXIF orientation. + IMREAD_GRAYSCALE = 0, //!< If set, always convert image to the single channel grayscale image (codec internal conversion). IMREAD_COLOR = 1, //!< If set, always convert image to the 3 channel BGR color image. IMREAD_ANYDEPTH = 2, //!< If set, return 16-bit/32-bit image when the input has the corresponding depth, otherwise convert it to 8-bit. IMREAD_ANYCOLOR = 4, //!< If set, the image is read in any possible color format. @@ -73,7 +73,8 @@ enum ImreadModes { IMREAD_REDUCED_GRAYSCALE_4 = 32, //!< If set, always convert image to the single channel grayscale image and the image size reduced 1/4. IMREAD_REDUCED_COLOR_4 = 33, //!< If set, always convert image to the 3 channel BGR color image and the image size reduced 1/4. IMREAD_REDUCED_GRAYSCALE_8 = 64, //!< If set, always convert image to the single channel grayscale image and the image size reduced 1/8. - IMREAD_REDUCED_COLOR_8 = 65 //!< If set, always convert image to the 3 channel BGR color image and the image size reduced 1/8. + IMREAD_REDUCED_COLOR_8 = 65, //!< If set, always convert image to the 3 channel BGR color image and the image size reduced 1/8. + IMREAD_IGNORE_ORIENTATION = 128 //!< If set, do not rotate the image according to EXIF's orientation flag. }; //! Imwrite flags @@ -84,11 +85,24 @@ enum ImwriteFlags { IMWRITE_JPEG_RST_INTERVAL = 4, //!< JPEG restart interval, 0 - 65535, default is 0 - no restart. IMWRITE_JPEG_LUMA_QUALITY = 5, //!< Separate luma quality level, 0 - 100, default is 0 - don't use. IMWRITE_JPEG_CHROMA_QUALITY = 6, //!< Separate chroma quality level, 0 - 100, default is 0 - don't use. - IMWRITE_PNG_COMPRESSION = 16, //!< For PNG, it can be the compression level from 0 to 9. A higher value means a smaller size and longer compression time. Default value is 3. - IMWRITE_PNG_STRATEGY = 17, //!< One of cv::ImwritePNGFlags, default is IMWRITE_PNG_STRATEGY_DEFAULT. + IMWRITE_PNG_COMPRESSION = 16, //!< For PNG, it can be the compression level from 0 to 9. A higher value means a smaller size and longer compression time. If specified, strategy is changed to IMWRITE_PNG_STRATEGY_DEFAULT (Z_DEFAULT_STRATEGY). Default value is 1 (best speed setting). + IMWRITE_PNG_STRATEGY = 17, //!< One of cv::ImwritePNGFlags, default is IMWRITE_PNG_STRATEGY_RLE. IMWRITE_PNG_BILEVEL = 18, //!< Binary level PNG, 0 or 1, default is 0. IMWRITE_PXM_BINARY = 32, //!< For PPM, PGM, or PBM, it can be a binary format flag, 0 or 1. Default value is 1. - IMWRITE_WEBP_QUALITY = 64 //!< For WEBP, it can be a quality from 1 to 100 (the higher is the better). By default (without any parameter) and for quality above 100 the lossless compression is used. + IMWRITE_EXR_TYPE = (3 << 4) + 0, /* 48 */ //!< override EXR storage type (FLOAT (FP32) is default) + IMWRITE_WEBP_QUALITY = 64, //!< For WEBP, it can be a quality from 1 to 100 (the higher is the better). By default (without any parameter) and for quality above 100 the lossless compression is used. + IMWRITE_PAM_TUPLETYPE = 128,//!< For PAM, sets the TUPLETYPE field to the corresponding string value that is defined for the format + IMWRITE_TIFF_RESUNIT = 256,//!< For TIFF, use to specify which DPI resolution unit to set; see libtiff documentation for valid values + IMWRITE_TIFF_XDPI = 257,//!< For TIFF, use to specify the X direction DPI + IMWRITE_TIFF_YDPI = 258, //!< For TIFF, use to specify the Y direction DPI + IMWRITE_TIFF_COMPRESSION = 259, //!< For TIFF, use to specify the image compression scheme. See libtiff for integer constants corresponding to compression formats. Note, for images whose depth is CV_32F, only libtiff's SGILOG compression scheme is used. For other supported depths, the compression scheme can be specified by this flag; LZW compression is the default. + IMWRITE_JPEG2000_COMPRESSION_X1000 = 272 //!< For JPEG2000, use to specify the target compression rate (multiplied by 1000). The value can be from 0 to 1000. Default is 1000. + }; + +enum ImwriteEXRTypeFlags { + /*IMWRITE_EXR_TYPE_UNIT = 0, //!< not supported */ + IMWRITE_EXR_TYPE_HALF = 1, //!< store as HALF (FP16) + IMWRITE_EXR_TYPE_FLOAT = 2 //!< store as FP32 (default) }; //! Imwrite PNG specific flags used to tune the compression algorithm. @@ -107,6 +121,16 @@ enum ImwritePNGFlags { IMWRITE_PNG_STRATEGY_FIXED = 4 //!< Using this value prevents the use of dynamic Huffman codes, allowing for a simpler decoder for special applications. }; +//! Imwrite PAM specific tupletype flags used to define the 'TUPETYPE' field of a PAM file. +enum ImwritePAMFlags { + IMWRITE_PAM_FORMAT_NULL = 0, + IMWRITE_PAM_FORMAT_BLACKANDWHITE = 1, + IMWRITE_PAM_FORMAT_GRAYSCALE = 2, + IMWRITE_PAM_FORMAT_GRAYSCALE_ALPHA = 3, + IMWRITE_PAM_FORMAT_RGB = 4, + IMWRITE_PAM_FORMAT_RGB_ALPHA = 5, + }; + /** @brief Loads an image from a file. @anchor imread @@ -118,21 +142,23 @@ returns an empty matrix ( Mat::data==NULL ). Currently, the following file formats are supported: - Windows bitmaps - \*.bmp, \*.dib (always supported) -- JPEG files - \*.jpeg, \*.jpg, \*.jpe (see the *Notes* section) -- JPEG 2000 files - \*.jp2 (see the *Notes* section) -- Portable Network Graphics - \*.png (see the *Notes* section) -- WebP - \*.webp (see the *Notes* section) +- JPEG files - \*.jpeg, \*.jpg, \*.jpe (see the *Note* section) +- JPEG 2000 files - \*.jp2 (see the *Note* section) +- Portable Network Graphics - \*.png (see the *Note* section) +- WebP - \*.webp (see the *Note* section) - Portable image format - \*.pbm, \*.pgm, \*.ppm \*.pxm, \*.pnm (always supported) +- PFM files - \*.pfm (see the *Note* section) - Sun rasters - \*.sr, \*.ras (always supported) -- TIFF files - \*.tiff, \*.tif (see the *Notes* section) -- OpenEXR Image files - \*.exr (see the *Notes* section) +- TIFF files - \*.tiff, \*.tif (see the *Note* section) +- OpenEXR Image files - \*.exr (see the *Note* section) - Radiance HDR - \*.hdr, \*.pic (always supported) -- Raster and Vector geospatial data supported by Gdal (see the *Notes* section) +- Raster and Vector geospatial data supported by GDAL (see the *Note* section) @note - - The function determines the type of an image by the content, not by the file extension. - In the case of color images, the decoded images will have the channels stored in **B G R** order. +- When using IMREAD_GRAYSCALE, the codec's internal grayscale conversion will be used, if available. + Results may differ to the output of cvtColor() - On Microsoft Windows\* OS and MacOSX\*, the codecs shipped with an OpenCV image (libjpeg, libpng, libtiff, and libjasper) are used by default. So, OpenCV can always read JPEGs, PNGs, and TIFFs. On MacOSX, there is also an option to use native MacOSX image readers. But beware @@ -143,9 +169,16 @@ Currently, the following file formats are supported: files, for example, "libjpeg-dev", in Debian\* and Ubuntu\*) to get the codec support or turn on the OPENCV_BUILD_3RDPARTY_LIBS flag in CMake. - In the case you set *WITH_GDAL* flag to true in CMake and @ref IMREAD_LOAD_GDAL to load the image, - then [GDAL](http://www.gdal.org) driver will be used in order to decode the image by supporting + then the [GDAL](http://www.gdal.org) driver will be used in order to decode the image, supporting the following formats: [Raster](http://www.gdal.org/formats_list.html), [Vector](http://www.gdal.org/ogr_formats.html). +- If EXIF information is embedded in the image file, the EXIF orientation will be taken into account + and thus the image will be rotated accordingly except if the flags @ref IMREAD_IGNORE_ORIENTATION + or @ref IMREAD_UNCHANGED are passed. +- Use the IMREAD_UNCHANGED flag to keep the floating point values from PFM image. +- By default number of pixels must be less than 2^30. Limit can be set using system + variable OPENCV_IO_MAX_IMAGE_PIXELS + @param filename Name of file to be loaded. @param flags Flag that can take values of cv::ImreadModes */ @@ -159,65 +192,30 @@ The function imreadmulti loads a multi-page image from the specified file into a @param mats A vector of Mat objects holding each page, if more than one. @sa cv::imread */ -CV_EXPORTS_W bool imreadmulti(const String& filename, std::vector& mats, int flags = IMREAD_ANYCOLOR); +CV_EXPORTS_W bool imreadmulti(const String& filename, CV_OUT std::vector& mats, int flags = IMREAD_ANYCOLOR); /** @brief Saves an image to a specified file. The function imwrite saves the image to the specified file. The image format is chosen based on the -filename extension (see cv::imread for the list of extensions). Only 8-bit (or 16-bit unsigned (CV_16U) -in case of PNG, JPEG 2000, and TIFF) single-channel or 3-channel (with 'BGR' channel order) images -can be saved using this function. If the format, depth or channel order is different, use -Mat::convertTo , and cv::cvtColor to convert it before saving. Or, use the universal FileStorage I/O +filename extension (see cv::imread for the list of extensions). In general, only 8-bit +single-channel or 3-channel (with 'BGR' channel order) images +can be saved using this function, with these exceptions: + +- 16-bit unsigned (CV_16U) images can be saved in the case of PNG, JPEG 2000, and TIFF formats +- 32-bit float (CV_32F) images can be saved in PFM, TIFF, OpenEXR, and Radiance HDR formats; + 3-channel (CV_32FC3) TIFF images will be saved using the LogLuv high dynamic range encoding + (4 bytes per pixel) +- PNG images with an alpha channel can be saved using this function. To do this, create +8-bit (or 16-bit) 4-channel image BGRA, where the alpha channel goes last. Fully transparent pixels +should have alpha set to 0, fully opaque pixels should have alpha set to 255/65535 (see the code sample below). + +If the format, depth or channel order is different, use +Mat::convertTo and cv::cvtColor to convert it before saving. Or, use the universal FileStorage I/O functions to save the image to XML or YAML format. -It is possible to store PNG images with an alpha channel using this function. To do this, create -8-bit (or 16-bit) 4-channel image BGRA, where the alpha channel goes last. Fully transparent pixels -should have alpha set to 0, fully opaque pixels should have alpha set to 255/65535. - -The sample below shows how to create such a BGRA image and store to PNG file. It also demonstrates how to set custom -compression parameters : -@code - #include - - using namespace cv; - using namespace std; - - void createAlphaMat(Mat &mat) - { - CV_Assert(mat.channels() == 4); - for (int i = 0; i < mat.rows; ++i) { - for (int j = 0; j < mat.cols; ++j) { - Vec4b& bgra = mat.at(i, j); - bgra[0] = UCHAR_MAX; // Blue - bgra[1] = saturate_cast((float (mat.cols - j)) / ((float)mat.cols) * UCHAR_MAX); // Green - bgra[2] = saturate_cast((float (mat.rows - i)) / ((float)mat.rows) * UCHAR_MAX); // Red - bgra[3] = saturate_cast(0.5 * (bgra[1] + bgra[2])); // Alpha - } - } - } - - int main(int argv, char **argc) - { - // Create mat with alpha channel - Mat mat(480, 640, CV_8UC4); - createAlphaMat(mat); - - vector compression_params; - compression_params.push_back(IMWRITE_PNG_COMPRESSION); - compression_params.push_back(9); - - try { - imwrite("alpha.png", mat, compression_params); - } - catch (cv::Exception& ex) { - fprintf(stderr, "Exception converting image to PNG format: %s\n", ex.what()); - return 1; - } - - fprintf(stdout, "Saved PNG file with alpha data.\n"); - return 0; - } -@endcode +The sample below shows how to create a BGRA image and save it to a PNG file. It also demonstrates how to set custom +compression parameters: +@include snippets/imgcodecs_imwrite.cpp @param filename Name of the file. @param img Image to be saved. @param params Format-specific parameters encoded as pairs (paramId_1, paramValue_1, paramId_2, paramValue_2, ... .) see cv::ImwriteFlags @@ -260,8 +258,21 @@ CV_EXPORTS_W bool imencode( const String& ext, InputArray img, CV_OUT std::vector& buf, const std::vector& params = std::vector()); +/** @brief Returns true if the specified image can be decoded by OpenCV + +@param filename File name of the image +*/ +CV_EXPORTS_W bool haveImageReader( const String& filename ); + +/** @brief Returns true if an image with the specified filename can be encoded by OpenCV + + @param filename File name of the image + */ +CV_EXPORTS_W bool haveImageWriter( const String& filename ); + + //! @} imgcodecs } // cv -#endif //__OPENCV_IMGCODECS_HPP__ +#endif //OPENCV_IMGCODECS_HPP diff --git a/IPL/include/opencv/opencv2/imgcodecs/imgcodecs_c.h b/IPL/include/opencv/opencv2/imgcodecs/imgcodecs_c.h index ad793cc..c78b3f7 100644 --- a/IPL/include/opencv/opencv2/imgcodecs/imgcodecs_c.h +++ b/IPL/include/opencv/opencv2/imgcodecs/imgcodecs_c.h @@ -1,137 +1 @@ -/*M/////////////////////////////////////////////////////////////////////////////////////// -// -// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. -// -// By downloading, copying, installing or using the software you agree to this license. -// If you do not agree to this license, do not download, install, -// copy or use the software. -// -// -// Intel License Agreement -// For Open Source Computer Vision Library -// -// Copyright (C) 2000, Intel Corporation, all rights reserved. -// Third party copyrights are property of their respective owners. -// -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: -// -// * Redistribution's of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// * Redistribution's in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// * The name of Intel Corporation may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// This software is provided by the copyright holders and contributors "as is" and -// any express or implied warranties, including, but not limited to, the implied -// warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, -// indirect, incidental, special, exemplary, or consequential damages -// (including, but not limited to, procurement of substitute goods or services; -// loss of use, data, or profits; or business interruption) however caused -// and on any theory of liability, whether in contract, strict liability, -// or tort (including negligence or otherwise) arising in any way out of -// the use of this software, even if advised of the possibility of such damage. -// -//M*/ - -#ifndef __OPENCV_IMGCODECS_H__ -#define __OPENCV_IMGCODECS_H__ - -#include "opencv2/core/core_c.h" - -#ifdef __cplusplus -extern "C" { -#endif /* __cplusplus */ - -/** @addtogroup imgcodecs_c - @{ - */ - -enum -{ -/* 8bit, color or not */ - CV_LOAD_IMAGE_UNCHANGED =-1, -/* 8bit, gray */ - CV_LOAD_IMAGE_GRAYSCALE =0, -/* ?, color */ - CV_LOAD_IMAGE_COLOR =1, -/* any depth, ? */ - CV_LOAD_IMAGE_ANYDEPTH =2, -/* ?, any color */ - CV_LOAD_IMAGE_ANYCOLOR =4 -}; - -/* load image from file - iscolor can be a combination of above flags where CV_LOAD_IMAGE_UNCHANGED - overrides the other flags - using CV_LOAD_IMAGE_ANYCOLOR alone is equivalent to CV_LOAD_IMAGE_UNCHANGED - unless CV_LOAD_IMAGE_ANYDEPTH is specified images are converted to 8bit -*/ -CVAPI(IplImage*) cvLoadImage( const char* filename, int iscolor CV_DEFAULT(CV_LOAD_IMAGE_COLOR)); -CVAPI(CvMat*) cvLoadImageM( const char* filename, int iscolor CV_DEFAULT(CV_LOAD_IMAGE_COLOR)); - -enum -{ - CV_IMWRITE_JPEG_QUALITY =1, - CV_IMWRITE_JPEG_PROGRESSIVE =2, - CV_IMWRITE_JPEG_OPTIMIZE =3, - CV_IMWRITE_JPEG_RST_INTERVAL =4, - CV_IMWRITE_JPEG_LUMA_QUALITY =5, - CV_IMWRITE_JPEG_CHROMA_QUALITY =6, - CV_IMWRITE_PNG_COMPRESSION =16, - CV_IMWRITE_PNG_STRATEGY =17, - CV_IMWRITE_PNG_BILEVEL =18, - CV_IMWRITE_PNG_STRATEGY_DEFAULT =0, - CV_IMWRITE_PNG_STRATEGY_FILTERED =1, - CV_IMWRITE_PNG_STRATEGY_HUFFMAN_ONLY =2, - CV_IMWRITE_PNG_STRATEGY_RLE =3, - CV_IMWRITE_PNG_STRATEGY_FIXED =4, - CV_IMWRITE_PXM_BINARY =32, - CV_IMWRITE_WEBP_QUALITY =64 -}; - -/* save image to file */ -CVAPI(int) cvSaveImage( const char* filename, const CvArr* image, - const int* params CV_DEFAULT(0) ); - -/* decode image stored in the buffer */ -CVAPI(IplImage*) cvDecodeImage( const CvMat* buf, int iscolor CV_DEFAULT(CV_LOAD_IMAGE_COLOR)); -CVAPI(CvMat*) cvDecodeImageM( const CvMat* buf, int iscolor CV_DEFAULT(CV_LOAD_IMAGE_COLOR)); - -/* encode image and store the result as a byte vector (single-row 8uC1 matrix) */ -CVAPI(CvMat*) cvEncodeImage( const char* ext, const CvArr* image, - const int* params CV_DEFAULT(0) ); - -enum -{ - CV_CVTIMG_FLIP =1, - CV_CVTIMG_SWAP_RB =2 -}; - -/* utility function: convert one image to another with optional vertical flip */ -CVAPI(void) cvConvertImage( const CvArr* src, CvArr* dst, int flags CV_DEFAULT(0)); - -CVAPI(int) cvHaveImageReader(const char* filename); -CVAPI(int) cvHaveImageWriter(const char* filename); - - -/****************************************************************************************\ -* Obsolete functions/synonyms * -\****************************************************************************************/ - -#define cvvLoadImage(name) cvLoadImage((name),1) -#define cvvSaveImage cvSaveImage -#define cvvConvertImage cvConvertImage - -/** @} imgcodecs_c */ - -#ifdef __cplusplus -} -#endif - -#endif // __OPENCV_IMGCODECS_H__ +#error "This header with legacy C API declarations has been removed from OpenCV. Legacy constants are available from legacy/constants_c.h file." diff --git a/IPL/include/opencv/opencv2/imgcodecs/ios.h b/IPL/include/opencv/opencv2/imgcodecs/ios.h index fbd6371..a90c6d3 100644 --- a/IPL/include/opencv/opencv2/imgcodecs/ios.h +++ b/IPL/include/opencv/opencv2/imgcodecs/ios.h @@ -50,8 +50,8 @@ //! @addtogroup imgcodecs_ios //! @{ -UIImage* MatToUIImage(const cv::Mat& image); -void UIImageToMat(const UIImage* image, - cv::Mat& m, bool alphaExist = false); +CV_EXPORTS UIImage* MatToUIImage(const cv::Mat& image); +CV_EXPORTS void UIImageToMat(const UIImage* image, + cv::Mat& m, bool alphaExist = false); //! @} diff --git a/IPL/include/opencv/opencv2/imgcodecs/legacy/constants_c.h b/IPL/include/opencv/opencv2/imgcodecs/legacy/constants_c.h new file mode 100644 index 0000000..de7be4f --- /dev/null +++ b/IPL/include/opencv/opencv2/imgcodecs/legacy/constants_c.h @@ -0,0 +1,54 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#ifndef OPENCV_IMGCODECS_LEGACY_CONSTANTS_H +#define OPENCV_IMGCODECS_LEGACY_CONSTANTS_H + +/* duplicate of "ImreadModes" enumeration for better compatibility with OpenCV 3.x */ +enum +{ +/* 8bit, color or not */ + CV_LOAD_IMAGE_UNCHANGED =-1, +/* 8bit, gray */ + CV_LOAD_IMAGE_GRAYSCALE =0, +/* ?, color */ + CV_LOAD_IMAGE_COLOR =1, +/* any depth, ? */ + CV_LOAD_IMAGE_ANYDEPTH =2, +/* ?, any color */ + CV_LOAD_IMAGE_ANYCOLOR =4, +/* ?, no rotate */ + CV_LOAD_IMAGE_IGNORE_ORIENTATION =128 +}; + +/* duplicate of "ImwriteFlags" enumeration for better compatibility with OpenCV 3.x */ +enum +{ + CV_IMWRITE_JPEG_QUALITY =1, + CV_IMWRITE_JPEG_PROGRESSIVE =2, + CV_IMWRITE_JPEG_OPTIMIZE =3, + CV_IMWRITE_JPEG_RST_INTERVAL =4, + CV_IMWRITE_JPEG_LUMA_QUALITY =5, + CV_IMWRITE_JPEG_CHROMA_QUALITY =6, + CV_IMWRITE_PNG_COMPRESSION =16, + CV_IMWRITE_PNG_STRATEGY =17, + CV_IMWRITE_PNG_BILEVEL =18, + CV_IMWRITE_PNG_STRATEGY_DEFAULT =0, + CV_IMWRITE_PNG_STRATEGY_FILTERED =1, + CV_IMWRITE_PNG_STRATEGY_HUFFMAN_ONLY =2, + CV_IMWRITE_PNG_STRATEGY_RLE =3, + CV_IMWRITE_PNG_STRATEGY_FIXED =4, + CV_IMWRITE_PXM_BINARY =32, + CV_IMWRITE_EXR_TYPE = 48, + CV_IMWRITE_WEBP_QUALITY =64, + CV_IMWRITE_PAM_TUPLETYPE = 128, + CV_IMWRITE_PAM_FORMAT_NULL = 0, + CV_IMWRITE_PAM_FORMAT_BLACKANDWHITE = 1, + CV_IMWRITE_PAM_FORMAT_GRAYSCALE = 2, + CV_IMWRITE_PAM_FORMAT_GRAYSCALE_ALPHA = 3, + CV_IMWRITE_PAM_FORMAT_RGB = 4, + CV_IMWRITE_PAM_FORMAT_RGB_ALPHA = 5, +}; + +#endif // OPENCV_IMGCODECS_LEGACY_CONSTANTS_H diff --git a/IPL/include/opencv/opencv2/imgproc.hpp b/IPL/include/opencv/opencv2/imgproc.hpp index 1f330f2..e7ccf28 100644 --- a/IPL/include/opencv/opencv2/imgproc.hpp +++ b/IPL/include/opencv/opencv2/imgproc.hpp @@ -40,13 +40,16 @@ // //M*/ -#ifndef __OPENCV_IMGPROC_HPP__ -#define __OPENCV_IMGPROC_HPP__ +#ifndef OPENCV_IMGPROC_HPP +#define OPENCV_IMGPROC_HPP #include "opencv2/core.hpp" /** - @defgroup imgproc Image processing + @defgroup imgproc Image Processing + +This module includes image-processing functions. + @{ @defgroup imgproc_filter Image Filtering @@ -67,7 +70,7 @@ processing the left-most pixels in each row, you need pixels to the left of them of the image. You can let these pixels be the same as the left-most image pixels ("replicated border" extrapolation method), or assume that all the non-existing pixels are zeros ("constant border" extrapolation method), and so on. OpenCV enables you to specify the extrapolation method. -For details, see cv::BorderTypes +For details, see #BorderTypes @anchor filter_depths ### Depth combinations @@ -102,7 +105,7 @@ the simplest and the fastest resize, need to solve two main problems with the ab previous section, for some \f$(x,y)\f$, either one of \f$f_x(x,y)\f$, or \f$f_y(x,y)\f$, or both of them may fall outside of the image. In this case, an extrapolation method needs to be used. OpenCV provides the same selection of extrapolation methods as in the filtering functions. In -addition, it provides the method BORDER_TRANSPARENT. This means that the corresponding pixels in +addition, it provides the method #BORDER_TRANSPARENT. This means that the corresponding pixels in the destination image will not be modified at all. - Interpolation of pixel values. Usually \f$f_x(x,y)\f$ and \f$f_y(x,y)\f$ are floating-point @@ -117,6 +120,8 @@ f_y(x,y))\f$, and then the value of the polynomial at \f$(f_x(x,y), f_y(x,y))\f$ interpolated pixel value. In OpenCV, you can choose between several interpolation methods. See resize for details. +@note The geometrical transformations do not work with `CV_8S` or `CV_32S` images. + @defgroup imgproc_misc Miscellaneous Image Transformations @defgroup imgproc_draw Drawing Functions @@ -146,6 +151,7 @@ case, the color[3] is simply copied to the repainted pixels. Thus, if you want t semi-transparent shapes, you can paint them in a separate buffer and then blend it with the main image. + @defgroup imgproc_color_conversions Color Space Conversions @defgroup imgproc_colormap ColorMaps in OpenCV The human perception isn't built for observing fine changes in grayscale images. Human eyes are more @@ -157,49 +163,16 @@ In OpenCV you only need applyColorMap to apply a colormap on a given image. The code reads the path to an image from command line, applies a Jet colormap on it and shows the result: -@code -#include -#include -#include -#include -using namespace cv; - -#include -using namespace std; - -int main(int argc, const char *argv[]) -{ - // We need an input image. (can be grayscale or color) - if (argc < 2) - { - cerr << "We need an image to process here. Please run: colorMap [path_to_image]" << endl; - return -1; - } - Mat img_in = imread(argv[1]); - if(img_in.empty()) - { - cerr << "Sample image (" << argv[1] << ") is empty. Please adjust your path, so it points to a valid input image!" << endl; - return -1; - } - // Holds the colormap version of the image: - Mat img_color; - // Apply the colormap: - applyColorMap(img_in, img_color, COLORMAP_JET); - // Show the result: - imshow("colorMap", img_color); - waitKey(0); - return 0; -} -@endcode +@include snippets/imgproc_applyColorMap.cpp -@see cv::ColormapTypes +@see #ColormapTypes @defgroup imgproc_subdiv2d Planar Subdivision The Subdiv2D class described in this section is used to perform various planar subdivision on a set of 2D points (represented as vector of Point2f). OpenCV subdivides a plane into triangles -using the Delaunay’s algorithm, which corresponds to the dual graph of the Voronoi diagram. -In the figure below, the Delaunay’s triangulation is marked with black lines and the Voronoi +using the Delaunay's algorithm, which corresponds to the dual graph of the Voronoi diagram. +In the figure below, the Delaunay's triangulation is marked with black lines and the Voronoi diagram with red lines. ![Delaunay triangulation (black) and Voronoi (red)](pics/delaunay_voronoi.png) @@ -213,6 +186,11 @@ location of points on the plane, building special graphs (such as NNG,RNG), and @defgroup imgproc_feature Feature Detection @defgroup imgproc_object Object Detection @defgroup imgproc_c C API + @defgroup imgproc_hal Hardware Acceleration Layer + @{ + @defgroup imgproc_hal_functions Functions + @defgroup imgproc_hal_interface Interface + @} @} */ @@ -226,10 +204,14 @@ namespace cv //! @addtogroup imgproc_filter //! @{ +enum SpecialFilter { + FILTER_SCHARR = -1 +}; + //! type of morphological operation enum MorphTypes{ - MORPH_ERODE = 0, //!< see cv::erode - MORPH_DILATE = 1, //!< see cv::dilate + MORPH_ERODE = 0, //!< see #erode + MORPH_DILATE = 1, //!< see #dilate MORPH_OPEN = 2, //!< an opening operation //!< \f[\texttt{dst} = \mathrm{open} ( \texttt{src} , \texttt{element} )= \mathrm{dilate} ( \mathrm{erode} ( \texttt{src} , \texttt{element} ))\f] MORPH_CLOSE = 3, //!< a closing operation @@ -240,8 +222,8 @@ enum MorphTypes{ //!< \f[\texttt{dst} = \mathrm{tophat} ( \texttt{src} , \texttt{element} )= \texttt{src} - \mathrm{open} ( \texttt{src} , \texttt{element} )\f] MORPH_BLACKHAT = 6, //!< "black hat" //!< \f[\texttt{dst} = \mathrm{blackhat} ( \texttt{src} , \texttt{element} )= \mathrm{close} ( \texttt{src} , \texttt{element} )- \texttt{src}\f] - MORPH_HITMISS = 7 //!< "hit and miss" - //!< .- Only supported for CV_8UC1 binary images. Tutorial can be found in [this page](http://opencv-code.com/tutorials/hit-or-miss-transform-in-opencv/) + MORPH_HITMISS = 7 //!< "hit or miss" + //!< .- Only supported for CV_8UC1 binary images. A tutorial can be found in the documentation }; //! shape of the structuring element @@ -272,6 +254,8 @@ enum InterpolationFlags{ INTER_AREA = 3, /** Lanczos interpolation over 8x8 neighborhood */ INTER_LANCZOS4 = 4, + /** Bit exact bilinear interpolation */ + INTER_LINEAR_EXACT = 5, /** mask for interpolation codes */ INTER_MAX = 7, /** flag, fills all of the destination image pixels. If some of them correspond to outliers in the @@ -279,13 +263,22 @@ enum InterpolationFlags{ WARP_FILL_OUTLIERS = 8, /** flag, inverse transformation - For example, polar transforms: - - flag is __not__ set: \f$dst( \phi , \rho ) = src(x,y)\f$ - - flag is set: \f$dst(x,y) = src( \phi , \rho )\f$ + For example, #linearPolar or #logPolar transforms: + - flag is __not__ set: \f$dst( \rho , \phi ) = src(x,y)\f$ + - flag is set: \f$dst(x,y) = src( \rho , \phi )\f$ */ WARP_INVERSE_MAP = 16 }; +/** \brief Specify the polar mapping mode +@sa warpPolar +*/ +enum WarpPolarMode +{ + WARP_POLAR_LINEAR = 0, ///< Remaps an image to/from polar space. + WARP_POLAR_LOG = 256 ///< Remaps an image to/from semilog-polar space. +}; + enum InterpolationMasks { INTER_BITS = 5, INTER_BITS2 = INTER_BITS * 2, @@ -299,7 +292,7 @@ enum InterpolationMasks { //! @{ //! Distance types for Distance Transform and M-estimators -//! @see cv::distanceTransform, cv::fitLine +//! @see distanceTransform, fitLine enum DistanceTypes { DIST_USER = -1, //!< User defined distance DIST_L1 = 1, //!< distance = |x1-x2| + |y1-y2| @@ -332,7 +325,7 @@ enum ThresholdTypes { }; //! adaptive threshold algorithm -//! see cv::adaptiveThreshold +//! @see adaptiveThreshold enum AdaptiveThresholdTypes { /** the threshold value \f$T(x,y)\f$ is a mean of the \f$\texttt{blockSize} \times \texttt{blockSize}\f$ neighborhood of \f$(x, y)\f$ minus C */ @@ -340,16 +333,10 @@ enum AdaptiveThresholdTypes { /** the threshold value \f$T(x, y)\f$ is a weighted sum (cross-correlation with a Gaussian window) of the \f$\texttt{blockSize} \times \texttt{blockSize}\f$ neighborhood of \f$(x, y)\f$ minus C . The default sigma (standard deviation) is used for the specified blockSize . See - cv::getGaussianKernel*/ + #getGaussianKernel*/ ADAPTIVE_THRESH_GAUSSIAN_C = 1 }; -//! cv::undistort mode -enum UndistortTypes { - PROJ_SPHERICAL_ORTHO = 0, - PROJ_SPHERICAL_EQRECT = 1 - }; - //! class of the pixel in GrabCut algorithm enum GrabCutClasses { GC_BGD = 0, //!< an obvious background pixels @@ -368,7 +355,9 @@ enum GrabCutModes { automatically initialized with GC_BGD .*/ GC_INIT_WITH_MASK = 1, /** The value means that the algorithm should just resume. */ - GC_EVAL = 2 + GC_EVAL = 2, + /** The value means that the algorithm should just run the grabCut algorithm (a single iteration) with the fixed model */ + GC_EVAL_FREEZE_MODEL = 3 }; //! distanceTransform algorithm flags @@ -405,7 +394,16 @@ enum ConnectedComponentsTypes { CC_STAT_WIDTH = 2, //!< The horizontal size of the bounding box CC_STAT_HEIGHT = 3, //!< The vertical size of the bounding box CC_STAT_AREA = 4, //!< The total area (in pixels) of the connected component - CC_STAT_MAX = 5 +#ifndef CV_DOXYGEN + CC_STAT_MAX = 5 //!< Max enumeration value. Used internally only for memory allocation +#endif +}; + +//! connected components algorithm +enum ConnectedComponentsAlgorithmsTypes { + CCL_WU = 0, //!< SAUF algorithm for 8-way connectivity, SAUF algorithm for 4-way connectivity + CCL_DEFAULT = -1, //!< BBDT algorithm for 8-way connectivity, SAUF algorithm for 4-way connectivity + CCL_GRANA = 1 //!< BBDT algorithm for 8-way connectivity, SAUF algorithm for 4-way connectivity }; //! mode of the contour retrieval algorithm @@ -440,8 +438,25 @@ enum ContourApproximationModes { CHAIN_APPROX_TC89_KCOS = 4 }; +/** @brief Shape matching methods + +\f$A\f$ denotes object1,\f$B\f$ denotes object2 + +\f$\begin{array}{l} m^A_i = \mathrm{sign} (h^A_i) \cdot \log{h^A_i} \\ m^B_i = \mathrm{sign} (h^B_i) \cdot \log{h^B_i} \end{array}\f$ + +and \f$h^A_i, h^B_i\f$ are the Hu moments of \f$A\f$ and \f$B\f$ , respectively. +*/ +enum ShapeMatchModes { + CONTOURS_MATCH_I1 =1, //!< \f[I_1(A,B) = \sum _{i=1...7} \left | \frac{1}{m^A_i} - \frac{1}{m^B_i} \right |\f] + CONTOURS_MATCH_I2 =2, //!< \f[I_2(A,B) = \sum _{i=1...7} \left | m^A_i - m^B_i \right |\f] + CONTOURS_MATCH_I3 =3 //!< \f[I_3(A,B) = \max _{i=1...7} \frac{ \left| m^A_i - m^B_i \right| }{ \left| m^A_i \right| }\f] +}; + //! @} imgproc_shape +//! @addtogroup imgproc_feature +//! @{ + //! Variants of a Hough transform enum HoughModes { @@ -458,11 +473,11 @@ enum HoughModes { /** multi-scale variant of the classical Hough transform. The lines are encoded the same way as HOUGH_STANDARD. */ HOUGH_MULTI_SCALE = 2, - HOUGH_GRADIENT = 3 //!< basically *21HT*, described in @cite Yuen90 + HOUGH_GRADIENT = 3, //!< basically *21HT*, described in @cite Yuen90 + HOUGH_GRADIENT_ALT = 4, //!< variation of HOUGH_GRADIENT to get better accuracy }; //! Variants of Line Segment %Detector -//! @ingroup imgproc_feature enum LineSegmentDetectorModes { LSD_REFINE_NONE = 0, //!< No refinement applied LSD_REFINE_STD = 1, //!< Standard refinement is applied. E.g. breaking arches into smaller straighter line approximations. @@ -470,6 +485,8 @@ enum LineSegmentDetectorModes { //!< refined through increase of precision, decrement in size, etc. }; +//! @} imgproc_feature + /** Histogram comparison methods @ingroup imgproc_hist */ @@ -500,9 +517,9 @@ enum HistCompMethods { HISTCMP_KL_DIV = 5 }; -/** the color conversion code +/** the color conversion codes @see @ref imgproc_color_conversions -@ingroup imgproc_misc +@ingroup imgproc_color_conversions */ enum ColorConversionCodes { COLOR_BGR2BGRA = 0, //!< add alpha channel to RGB or BGR image @@ -587,7 +604,7 @@ enum ColorConversionCodes { COLOR_HLS2BGR = 60, COLOR_HLS2RGB = 61, - COLOR_BGR2HSV_FULL = 66, //!< + COLOR_BGR2HSV_FULL = 66, COLOR_RGB2HSV_FULL = 67, COLOR_BGR2HLS_FULL = 68, COLOR_RGB2HLS_FULL = 69, @@ -757,135 +774,215 @@ enum ColorConversionCodes { COLOR_BayerRG2RGB_EA = COLOR_BayerBG2BGR_EA, COLOR_BayerGR2RGB_EA = COLOR_BayerGB2BGR_EA, + //! Demosaicing with alpha channel + COLOR_BayerBG2BGRA = 139, + COLOR_BayerGB2BGRA = 140, + COLOR_BayerRG2BGRA = 141, + COLOR_BayerGR2BGRA = 142, - COLOR_COLORCVT_MAX = 139 + COLOR_BayerBG2RGBA = COLOR_BayerRG2BGRA, + COLOR_BayerGB2RGBA = COLOR_BayerGR2BGRA, + COLOR_BayerRG2RGBA = COLOR_BayerBG2BGRA, + COLOR_BayerGR2RGBA = COLOR_BayerGB2BGRA, + + COLOR_COLORCVT_MAX = 143 }; -/** types of intersection between rectangles -@ingroup imgproc_shape -*/ +//! @addtogroup imgproc_shape +//! @{ + +//! types of intersection between rectangles enum RectanglesIntersectTypes { INTERSECT_NONE = 0, //!< No intersection INTERSECT_PARTIAL = 1, //!< There is a partial intersection INTERSECT_FULL = 2 //!< One of the rectangle is fully enclosed in the other }; -//! finds arbitrary template in the grayscale image using Generalized Hough Transform -class CV_EXPORTS GeneralizedHough : public Algorithm +/** types of line +@ingroup imgproc_draw +*/ +enum LineTypes { + FILLED = -1, + LINE_4 = 4, //!< 4-connected line + LINE_8 = 8, //!< 8-connected line + LINE_AA = 16 //!< antialiased line +}; + +/** Only a subset of Hershey fonts are supported +@ingroup imgproc_draw +*/ +enum HersheyFonts { + FONT_HERSHEY_SIMPLEX = 0, //!< normal size sans-serif font + FONT_HERSHEY_PLAIN = 1, //!< small size sans-serif font + FONT_HERSHEY_DUPLEX = 2, //!< normal size sans-serif font (more complex than FONT_HERSHEY_SIMPLEX) + FONT_HERSHEY_COMPLEX = 3, //!< normal size serif font + FONT_HERSHEY_TRIPLEX = 4, //!< normal size serif font (more complex than FONT_HERSHEY_COMPLEX) + FONT_HERSHEY_COMPLEX_SMALL = 5, //!< smaller version of FONT_HERSHEY_COMPLEX + FONT_HERSHEY_SCRIPT_SIMPLEX = 6, //!< hand-writing style font + FONT_HERSHEY_SCRIPT_COMPLEX = 7, //!< more complex variant of FONT_HERSHEY_SCRIPT_SIMPLEX + FONT_ITALIC = 16 //!< flag for italic font +}; + +/** Possible set of marker types used for the cv::drawMarker function +@ingroup imgproc_draw +*/ +enum MarkerTypes +{ + MARKER_CROSS = 0, //!< A crosshair marker shape + MARKER_TILTED_CROSS = 1, //!< A 45 degree tilted crosshair marker shape + MARKER_STAR = 2, //!< A star marker shape, combination of cross and tilted cross + MARKER_DIAMOND = 3, //!< A diamond marker shape + MARKER_SQUARE = 4, //!< A square marker shape + MARKER_TRIANGLE_UP = 5, //!< An upwards pointing triangle marker shape + MARKER_TRIANGLE_DOWN = 6 //!< A downwards pointing triangle marker shape +}; + +/** @brief finds arbitrary template in the grayscale image using Generalized Hough Transform +*/ +class CV_EXPORTS_W GeneralizedHough : public Algorithm { public: //! set template to search - virtual void setTemplate(InputArray templ, Point templCenter = Point(-1, -1)) = 0; - virtual void setTemplate(InputArray edges, InputArray dx, InputArray dy, Point templCenter = Point(-1, -1)) = 0; + CV_WRAP virtual void setTemplate(InputArray templ, Point templCenter = Point(-1, -1)) = 0; + CV_WRAP virtual void setTemplate(InputArray edges, InputArray dx, InputArray dy, Point templCenter = Point(-1, -1)) = 0; //! find template on image - virtual void detect(InputArray image, OutputArray positions, OutputArray votes = noArray()) = 0; - virtual void detect(InputArray edges, InputArray dx, InputArray dy, OutputArray positions, OutputArray votes = noArray()) = 0; + CV_WRAP virtual void detect(InputArray image, OutputArray positions, OutputArray votes = noArray()) = 0; + CV_WRAP virtual void detect(InputArray edges, InputArray dx, InputArray dy, OutputArray positions, OutputArray votes = noArray()) = 0; //! Canny low threshold. - virtual void setCannyLowThresh(int cannyLowThresh) = 0; - virtual int getCannyLowThresh() const = 0; + CV_WRAP virtual void setCannyLowThresh(int cannyLowThresh) = 0; + CV_WRAP virtual int getCannyLowThresh() const = 0; //! Canny high threshold. - virtual void setCannyHighThresh(int cannyHighThresh) = 0; - virtual int getCannyHighThresh() const = 0; + CV_WRAP virtual void setCannyHighThresh(int cannyHighThresh) = 0; + CV_WRAP virtual int getCannyHighThresh() const = 0; //! Minimum distance between the centers of the detected objects. - virtual void setMinDist(double minDist) = 0; - virtual double getMinDist() const = 0; + CV_WRAP virtual void setMinDist(double minDist) = 0; + CV_WRAP virtual double getMinDist() const = 0; //! Inverse ratio of the accumulator resolution to the image resolution. - virtual void setDp(double dp) = 0; - virtual double getDp() const = 0; + CV_WRAP virtual void setDp(double dp) = 0; + CV_WRAP virtual double getDp() const = 0; //! Maximal size of inner buffers. - virtual void setMaxBufferSize(int maxBufferSize) = 0; - virtual int getMaxBufferSize() const = 0; + CV_WRAP virtual void setMaxBufferSize(int maxBufferSize) = 0; + CV_WRAP virtual int getMaxBufferSize() const = 0; }; -//! Ballard, D.H. (1981). Generalizing the Hough transform to detect arbitrary shapes. Pattern Recognition 13 (2): 111-122. -//! Detects position only without traslation and rotation -class CV_EXPORTS GeneralizedHoughBallard : public GeneralizedHough +/** @brief finds arbitrary template in the grayscale image using Generalized Hough Transform + +Detects position only without translation and rotation @cite Ballard1981 . +*/ +class CV_EXPORTS_W GeneralizedHoughBallard : public GeneralizedHough { public: //! R-Table levels. - virtual void setLevels(int levels) = 0; - virtual int getLevels() const = 0; + CV_WRAP virtual void setLevels(int levels) = 0; + CV_WRAP virtual int getLevels() const = 0; //! The accumulator threshold for the template centers at the detection stage. The smaller it is, the more false positions may be detected. - virtual void setVotesThreshold(int votesThreshold) = 0; - virtual int getVotesThreshold() const = 0; + CV_WRAP virtual void setVotesThreshold(int votesThreshold) = 0; + CV_WRAP virtual int getVotesThreshold() const = 0; }; -//! Guil, N., González-Linares, J.M. and Zapata, E.L. (1999). Bidimensional shape detection using an invariant approach. Pattern Recognition 32 (6): 1025-1038. -//! Detects position, traslation and rotation -class CV_EXPORTS GeneralizedHoughGuil : public GeneralizedHough +/** @brief finds arbitrary template in the grayscale image using Generalized Hough Transform + +Detects position, translation and rotation @cite Guil1999 . +*/ +class CV_EXPORTS_W GeneralizedHoughGuil : public GeneralizedHough { public: //! Angle difference in degrees between two points in feature. - virtual void setXi(double xi) = 0; - virtual double getXi() const = 0; + CV_WRAP virtual void setXi(double xi) = 0; + CV_WRAP virtual double getXi() const = 0; //! Feature table levels. - virtual void setLevels(int levels) = 0; - virtual int getLevels() const = 0; + CV_WRAP virtual void setLevels(int levels) = 0; + CV_WRAP virtual int getLevels() const = 0; //! Maximal difference between angles that treated as equal. - virtual void setAngleEpsilon(double angleEpsilon) = 0; - virtual double getAngleEpsilon() const = 0; + CV_WRAP virtual void setAngleEpsilon(double angleEpsilon) = 0; + CV_WRAP virtual double getAngleEpsilon() const = 0; //! Minimal rotation angle to detect in degrees. - virtual void setMinAngle(double minAngle) = 0; - virtual double getMinAngle() const = 0; + CV_WRAP virtual void setMinAngle(double minAngle) = 0; + CV_WRAP virtual double getMinAngle() const = 0; //! Maximal rotation angle to detect in degrees. - virtual void setMaxAngle(double maxAngle) = 0; - virtual double getMaxAngle() const = 0; + CV_WRAP virtual void setMaxAngle(double maxAngle) = 0; + CV_WRAP virtual double getMaxAngle() const = 0; //! Angle step in degrees. - virtual void setAngleStep(double angleStep) = 0; - virtual double getAngleStep() const = 0; + CV_WRAP virtual void setAngleStep(double angleStep) = 0; + CV_WRAP virtual double getAngleStep() const = 0; //! Angle votes threshold. - virtual void setAngleThresh(int angleThresh) = 0; - virtual int getAngleThresh() const = 0; + CV_WRAP virtual void setAngleThresh(int angleThresh) = 0; + CV_WRAP virtual int getAngleThresh() const = 0; //! Minimal scale to detect. - virtual void setMinScale(double minScale) = 0; - virtual double getMinScale() const = 0; + CV_WRAP virtual void setMinScale(double minScale) = 0; + CV_WRAP virtual double getMinScale() const = 0; //! Maximal scale to detect. - virtual void setMaxScale(double maxScale) = 0; - virtual double getMaxScale() const = 0; + CV_WRAP virtual void setMaxScale(double maxScale) = 0; + CV_WRAP virtual double getMaxScale() const = 0; //! Scale step. - virtual void setScaleStep(double scaleStep) = 0; - virtual double getScaleStep() const = 0; + CV_WRAP virtual void setScaleStep(double scaleStep) = 0; + CV_WRAP virtual double getScaleStep() const = 0; //! Scale votes threshold. - virtual void setScaleThresh(int scaleThresh) = 0; - virtual int getScaleThresh() const = 0; + CV_WRAP virtual void setScaleThresh(int scaleThresh) = 0; + CV_WRAP virtual int getScaleThresh() const = 0; //! Position votes threshold. - virtual void setPosThresh(int posThresh) = 0; - virtual int getPosThresh() const = 0; + CV_WRAP virtual void setPosThresh(int posThresh) = 0; + CV_WRAP virtual int getPosThresh() const = 0; }; +//! @} imgproc_shape + +//! @addtogroup imgproc_hist +//! @{ +/** @brief Base class for Contrast Limited Adaptive Histogram Equalization. +*/ class CV_EXPORTS_W CLAHE : public Algorithm { public: + /** @brief Equalizes the histogram of a grayscale image using Contrast Limited Adaptive Histogram Equalization. + + @param src Source image of type CV_8UC1 or CV_16UC1. + @param dst Destination image. + */ CV_WRAP virtual void apply(InputArray src, OutputArray dst) = 0; + /** @brief Sets threshold for contrast limiting. + + @param clipLimit threshold value. + */ CV_WRAP virtual void setClipLimit(double clipLimit) = 0; + + //! Returns threshold value for contrast limiting. CV_WRAP virtual double getClipLimit() const = 0; + /** @brief Sets size of grid for histogram equalization. Input image will be divided into + equally sized rectangular tiles. + + @param tileGridSize defines the number of tiles in row and column. + */ CV_WRAP virtual void setTilesGridSize(Size tileGridSize) = 0; + + //!@brief Returns Size defines the number of tiles in row and column. CV_WRAP virtual Size getTilesGridSize() const = 0; CV_WRAP virtual void collectGarbage() = 0; }; +//! @} imgproc_hist //! @addtogroup imgproc_subdiv2d //! @{ @@ -913,13 +1010,13 @@ class CV_EXPORTS_W Subdiv2D }; /** creates an empty Subdiv2D object. - To create a new empty Delaunay subdivision you need to use the initDelaunay() function. + To create a new empty Delaunay subdivision you need to use the #initDelaunay function. */ CV_WRAP Subdiv2D(); /** @overload - @param rect – Rectangle that includes all of the 2D points that are to be added to the subdivision. + @param rect Rectangle that includes all of the 2D points that are to be added to the subdivision. The function creates an empty Delaunay subdivision where 2D points can be added using the function insert() . All of the points to be added must be within the specified rectangle, otherwise a runtime @@ -929,14 +1026,14 @@ class CV_EXPORTS_W Subdiv2D /** @brief Creates a new empty Delaunay subdivision - @param rect – Rectangle that includes all of the 2D points that are to be added to the subdivision. + @param rect Rectangle that includes all of the 2D points that are to be added to the subdivision. */ CV_WRAP void initDelaunay(Rect rect); /** @brief Insert a single point into a Delaunay triangulation. - @param pt – Point to insert. + @param pt Point to insert. The function inserts a single point into a subdivision and modifies the subdivision topology appropriately. If a point with the same coordinates exists already, no new point is added. @@ -948,7 +1045,7 @@ class CV_EXPORTS_W Subdiv2D /** @brief Insert multiple points into a Delaunay triangulation. - @param ptvec – Points to insert. + @param ptvec Points to insert. The function inserts a vector of points into a subdivision and modifies the subdivision topology appropriately. @@ -957,30 +1054,30 @@ class CV_EXPORTS_W Subdiv2D /** @brief Returns the location of a point within a Delaunay triangulation. - @param pt – Point to locate. - @param edge – Output edge that the point belongs to or is located to the right of it. - @param vertex – Optional output vertex the input point coincides with. + @param pt Point to locate. + @param edge Output edge that the point belongs to or is located to the right of it. + @param vertex Optional output vertex the input point coincides with. The function locates the input point within the subdivision and gives one of the triangle edges or vertices. @returns an integer which specify one of the following five cases for point location: - - The point falls into some facet. The function returns PTLOC_INSIDE and edge will contain one of + - The point falls into some facet. The function returns #PTLOC_INSIDE and edge will contain one of edges of the facet. - - The point falls onto the edge. The function returns PTLOC_ON_EDGE and edge will contain this edge. - - The point coincides with one of the subdivision vertices. The function returns PTLOC_VERTEX and + - The point falls onto the edge. The function returns #PTLOC_ON_EDGE and edge will contain this edge. + - The point coincides with one of the subdivision vertices. The function returns #PTLOC_VERTEX and vertex will contain a pointer to the vertex. - - The point is outside the subdivision reference rectangle. The function returns PTLOC_OUTSIDE_RECT + - The point is outside the subdivision reference rectangle. The function returns #PTLOC_OUTSIDE_RECT and no pointers are filled. - - One of input arguments is invalid. A runtime error is raised or, if silent or “parent” error - processing mode is selected, CV_PTLOC_ERROR is returnd. + - One of input arguments is invalid. A runtime error is raised or, if silent or "parent" error + processing mode is selected, #PTLOC_ERROR is returned. */ CV_WRAP int locate(Point2f pt, CV_OUT int& edge, CV_OUT int& vertex); /** @brief Finds the subdivision vertex closest to the given point. - @param pt – Input point. - @param nearestPt – Output subdivision vertex point. + @param pt Input point. + @param nearestPt Output subdivision vertex point. The function is another function that locates the input point within the subdivision. It finds the subdivision vertex that is the closest to the input point. It is not necessarily one of vertices @@ -993,27 +1090,35 @@ class CV_EXPORTS_W Subdiv2D /** @brief Returns a list of all edges. - @param edgeList – Output vector. + @param edgeList Output vector. The function gives each edge as a 4 numbers vector, where each two are one of the edge vertices. i.e. org_x = v[0], org_y = v[1], dst_x = v[2], dst_y = v[3]. */ CV_WRAP void getEdgeList(CV_OUT std::vector& edgeList) const; + /** @brief Returns a list of the leading edge ID connected to each triangle. + + @param leadingEdgeList Output vector. + + The function gives one edge ID for each triangle. + */ + CV_WRAP void getLeadingEdgeList(CV_OUT std::vector& leadingEdgeList) const; + /** @brief Returns a list of all triangles. - @param triangleList – Output vector. + @param triangleList Output vector. The function gives each triangle as a 6 numbers vector, where each two are one of the triangle vertices. i.e. p1_x = v[0], p1_y = v[1], p2_x = v[2], p2_y = v[3], p3_x = v[4], p3_y = v[5]. */ CV_WRAP void getTriangleList(CV_OUT std::vector& triangleList) const; - /** @brief Returns a list of all Voroni facets. + /** @brief Returns a list of all Voronoi facets. - @param idx – Vector of vertices IDs to consider. For all vertices you can pass empty vector. - @param facetList – Output vector of the Voroni facets. - @param facetCenters – Output vector of the Voroni facets center points. + @param idx Vector of vertices IDs to consider. For all vertices you can pass empty vector. + @param facetList Output vector of the Voronoi facets. + @param facetCenters Output vector of the Voronoi facets center points. */ CV_WRAP void getVoronoiFacetList(const std::vector& idx, CV_OUT std::vector >& facetList, @@ -1021,8 +1126,8 @@ class CV_EXPORTS_W Subdiv2D /** @brief Returns vertex location from vertex ID. - @param vertex – vertex ID. - @param firstEdge – Optional. The first edge ID which is connected to the vertex. + @param vertex vertex ID. + @param firstEdge Optional. The first edge ID which is connected to the vertex. @returns vertex (x,y) */ @@ -1030,8 +1135,8 @@ class CV_EXPORTS_W Subdiv2D /** @brief Returns one of the edges related to the given edge. - @param edge – Subdivision edge ID. - @param nextEdgeType - Parameter specifying which of the related edges to return. + @param edge Subdivision edge ID. + @param nextEdgeType Parameter specifying which of the related edges to return. The following values are possible: - NEXT_AROUND_ORG next around the edge origin ( eOnext on the picture below if e is the input edge) - NEXT_AROUND_DST next around the edge vertex ( eDnext ) @@ -1050,7 +1155,7 @@ class CV_EXPORTS_W Subdiv2D /** @brief Returns next edge around the edge origin. - @param edge – Subdivision edge ID. + @param edge Subdivision edge ID. @returns an integer which is next edge ID around the edge origin: eOnext on the picture above if e is the input edge). @@ -1059,8 +1164,8 @@ class CV_EXPORTS_W Subdiv2D /** @brief Returns another edge of the same quad-edge. - @param edge – Subdivision edge ID. - @param rotate - Parameter specifying which of the edges of the same quad-edge as the input + @param edge Subdivision edge ID. + @param rotate Parameter specifying which of the edges of the same quad-edge as the input one to return. The following values are possible: - 0 - the input edge ( e on the picture below if e is the input edge) - 1 - the rotated edge ( eRot ) @@ -1074,8 +1179,8 @@ class CV_EXPORTS_W Subdiv2D /** @brief Returns the edge origin. - @param edge – Subdivision edge ID. - @param orgpt – Output vertex location. + @param edge Subdivision edge ID. + @param orgpt Output vertex location. @returns vertex ID. */ @@ -1083,8 +1188,8 @@ class CV_EXPORTS_W Subdiv2D /** @brief Returns the edge destination. - @param edge – Subdivision edge ID. - @param dstpt – Output vertex location. + @param edge Subdivision edge ID. + @param dstpt Output vertex location. @returns vertex ID. */ @@ -1146,13 +1251,12 @@ class CV_EXPORTS_W Subdiv2D //! @addtogroup imgproc_feature //! @{ -/** @example lsd_lines.cpp -An example using the LineSegmentDetector -*/ - /** @brief Line segment detector class following the algorithm described at @cite Rafael12 . + +@note Implementation has been removed due original code license conflict + */ class CV_EXPORTS_W LineSegmentDetector : public Algorithm { @@ -1176,14 +1280,14 @@ class CV_EXPORTS_W LineSegmentDetector : public Algorithm - -1 corresponds to 10 mean false alarms - 0 corresponds to 1 mean false alarm - 1 corresponds to 0.1 mean false alarms - This vector will be calculated only when the objects type is LSD_REFINE_ADV. + This vector will be calculated only when the objects type is #LSD_REFINE_ADV. */ CV_WRAP virtual void detect(InputArray _image, OutputArray _lines, OutputArray width = noArray(), OutputArray prec = noArray(), OutputArray nfa = noArray()) = 0; /** @brief Draws the line segments on a given image. - @param _image The image, where the liens will be drawn. Should be bigger or equal to the image, + @param _image The image, where the lines will be drawn. Should be bigger or equal to the image, where the lines were found. @param lines A vector of the lines that needed to be drawn. */ @@ -1207,15 +1311,17 @@ class CV_EXPORTS_W LineSegmentDetector : public Algorithm The LineSegmentDetector algorithm is defined using the standard values. Only advanced users may want to edit those, as to tailor it for their own application. -@param _refine The way found lines will be refined, see cv::LineSegmentDetectorModes +@param _refine The way found lines will be refined, see #LineSegmentDetectorModes @param _scale The scale of the image that will be used to find the lines. Range (0..1]. @param _sigma_scale Sigma for Gaussian filter. It is computed as sigma = _sigma_scale/_scale. @param _quant Bound to the quantization error on the gradient norm. @param _ang_th Gradient angle tolerance in degrees. -@param _log_eps Detection threshold: -log10(NFA) \> log_eps. Used only when advancent refinement +@param _log_eps Detection threshold: -log10(NFA) \> log_eps. Used only when advance refinement is chosen. @param _density_th Minimal density of aligned region points in the enclosing rectangle. @param _n_bins Number of bins in pseudo-ordering of gradient modulus. + +@note Implementation has been removed due original code license conflict */ CV_EXPORTS_W Ptr createLineSegmentDetector( int _refine = LSD_REFINE_STD, double _scale = 0.8, @@ -1241,7 +1347,7 @@ smoothing kernels (a symmetrical kernel with sum of weights equal to 1) and hand You may also use the higher-level GaussianBlur. @param ksize Aperture size. It should be odd ( \f$\texttt{ksize} \mod 2 = 1\f$ ) and positive. @param sigma Gaussian standard deviation. If it is non-positive, it is computed from ksize as -`sigma = 0.3\*((ksize-1)\*0.5 - 1) + 0.8`. +`sigma = 0.3*((ksize-1)*0.5 - 1) + 0.8`. @param ktype Type of filter coefficients. It can be CV_32F or CV_64F . @sa sepFilter2D, getDerivKernels, getStructuringElement, GaussianBlur */ @@ -1250,14 +1356,14 @@ CV_EXPORTS_W Mat getGaussianKernel( int ksize, double sigma, int ktype = CV_64F /** @brief Returns filter coefficients for computing spatial image derivatives. The function computes and returns the filter coefficients for spatial image derivatives. When -`ksize=CV_SCHARR`, the Scharr \f$3 \times 3\f$ kernels are generated (see cv::Scharr). Otherwise, Sobel -kernels are generated (see cv::Sobel). The filters are normally passed to sepFilter2D or to +`ksize=FILTER_SCHARR`, the Scharr \f$3 \times 3\f$ kernels are generated (see #Scharr). Otherwise, Sobel +kernels are generated (see #Sobel). The filters are normally passed to #sepFilter2D or to @param kx Output matrix of row filter coefficients. It has the type ktype . @param ky Output matrix of column filter coefficients. It has the type ktype . @param dx Derivative order in respect of x. @param dy Derivative order in respect of y. -@param ksize Aperture size. It can be CV_SCHARR, 1, 3, 5, or 7. +@param ksize Aperture size. It can be FILTER_SCHARR, 1, 3, 5, or 7. @param normalize Flag indicating whether to normalize (scale down) the filter coefficients or not. Theoretically, the coefficients should have the denominator \f$=2^{ksize*2-dx-dy-2}\f$. If you are going to filter floating-point images, you are likely to use the normalized kernels. But if you @@ -1290,11 +1396,11 @@ static inline Scalar morphologyDefaultBorderValue() { return Scalar::all(DBL_MAX /** @brief Returns a structuring element of the specified size and shape for morphological operations. -The function constructs and returns the structuring element that can be further passed to cv::erode, -cv::dilate or cv::morphologyEx. But you can also construct an arbitrary binary mask yourself and use it as +The function constructs and returns the structuring element that can be further passed to #erode, +#dilate or #morphologyEx. But you can also construct an arbitrary binary mask yourself and use it as the structuring element. -@param shape Element shape that could be one of cv::MorphShapes +@param shape Element shape that could be one of #MorphShapes @param ksize Size of the structuring element. @param anchor Anchor position within the element. The default value \f$(-1, -1)\f$ means that the anchor is at the center. Note that only the shape of a cross-shaped element depends on the anchor @@ -1303,12 +1409,20 @@ operation is shifted. */ CV_EXPORTS_W Mat getStructuringElement(int shape, Size ksize, Point anchor = Point(-1,-1)); +/** @example samples/cpp/tutorial_code/ImgProc/Smoothing/Smoothing.cpp +Sample code for simple filters +![Sample screenshot](Smoothing_Tutorial_Result_Median_Filter.jpg) +Check @ref tutorial_gausian_median_blur_bilateral_filter "the corresponding tutorial" for more details + */ + /** @brief Blurs an image using the median filter. The function smoothes an image using the median filter with the \f$\texttt{ksize} \times \texttt{ksize}\f$ aperture. Each channel of a multi-channel image is processed independently. In-place operation is supported. +@note The median filter uses #BORDER_REPLICATE internally to cope with border pixels, see #BorderTypes + @param src input 1-, 3-, or 4-channel image; when ksize is 3 or 5, the image depth should be CV_8U, CV_16U, or CV_32F, for larger aperture sizes, it can only be CV_8U. @param dst destination array of the same size and type as src. @@ -1330,10 +1444,10 @@ positive and odd. Or, they can be zero's and then they are computed from sigma. @param sigmaX Gaussian kernel standard deviation in X direction. @param sigmaY Gaussian kernel standard deviation in Y direction; if sigmaY is zero, it is set to be equal to sigmaX, if both sigmas are zeros, they are computed from ksize.width and ksize.height, -respectively (see cv::getGaussianKernel for details); to fully control the result regardless of +respectively (see #getGaussianKernel for details); to fully control the result regardless of possible future modifications of all this semantics, it is recommended to specify all of ksize, sigmaX, and sigmaY. -@param borderType pixel extrapolation method, see cv::BorderTypes +@param borderType pixel extrapolation method, see #BorderTypes. #BORDER_WRAP is not supported. @sa sepFilter2D, filter2D, blur, boxFilter, bilateralFilter, medianBlur */ @@ -1367,7 +1481,7 @@ in larger areas of semi-equal color. farther pixels will influence each other as long as their colors are close enough (see sigmaColor ). When d\>0, it specifies the neighborhood size regardless of sigmaSpace. Otherwise, d is proportional to sigmaSpace. -@param borderType border mode used to extrapolate pixels outside of the image, see cv::BorderTypes +@param borderType border mode used to extrapolate pixels outside of the image, see #BorderTypes */ CV_EXPORTS_W void bilateralFilter( InputArray src, OutputArray dst, int d, double sigmaColor, double sigmaSpace, @@ -1375,7 +1489,7 @@ CV_EXPORTS_W void bilateralFilter( InputArray src, OutputArray dst, int d, /** @brief Blurs an image using the box filter. -The function smoothes an image using the kernel: +The function smooths an image using the kernel: \f[\texttt{K} = \alpha \begin{bmatrix} 1 & 1 & 1 & \cdots & 1 & 1 \\ 1 & 1 & 1 & \cdots & 1 & 1 \\ \hdotsfor{6} \\ 1 & 1 & 1 & \cdots & 1 & 1 \end{bmatrix}\f] @@ -1385,7 +1499,7 @@ where Unnormalized box filter is useful for computing various integral characteristics over each pixel neighborhood, such as covariance matrices of image derivatives (used in dense optical flow -algorithms, and so on). If you need to compute pixel sums over variable-size windows, use cv::integral. +algorithms, and so on). If you need to compute pixel sums over variable-size windows, use #integral. @param src input image. @param dst output image of the same size and type as src. @@ -1394,7 +1508,7 @@ algorithms, and so on). If you need to compute pixel sums over variable-size win @param anchor anchor point; default value Point(-1,-1) means that the anchor is at the kernel center. @param normalize flag, specifying whether the kernel is normalized by its area or not. -@param borderType border mode used to extrapolate pixels outside of the image, see cv::BorderTypes +@param borderType border mode used to extrapolate pixels outside of the image, see #BorderTypes. #BORDER_WRAP is not supported. @sa blur, bilateralFilter, GaussianBlur, medianBlur, integral */ CV_EXPORTS_W void boxFilter( InputArray src, OutputArray dst, int ddepth, @@ -1410,24 +1524,24 @@ pixel values which overlap the filter placed over the pixel \f$ (x, y) \f$. The unnormalized square box filter can be useful in computing local image statistics such as the the local variance and standard deviation around the neighborhood of a pixel. -@param _src input image -@param _dst output image of the same size and type as _src +@param src input image +@param dst output image of the same size and type as _src @param ddepth the output image depth (-1 to use src.depth()) @param ksize kernel size @param anchor kernel anchor point. The default value of Point(-1, -1) denotes that the anchor is at the kernel center. @param normalize flag, specifying whether the kernel is to be normalized by it's area or not. -@param borderType border mode used to extrapolate pixels outside of the image, see cv::BorderTypes +@param borderType border mode used to extrapolate pixels outside of the image, see #BorderTypes. #BORDER_WRAP is not supported. @sa boxFilter */ -CV_EXPORTS_W void sqrBoxFilter( InputArray _src, OutputArray _dst, int ddepth, +CV_EXPORTS_W void sqrBoxFilter( InputArray src, OutputArray dst, int ddepth, Size ksize, Point anchor = Point(-1, -1), bool normalize = true, int borderType = BORDER_DEFAULT ); /** @brief Blurs an image using the normalized box filter. -The function smoothes an image using the kernel: +The function smooths an image using the kernel: \f[\texttt{K} = \frac{1}{\texttt{ksize.width*ksize.height}} \begin{bmatrix} 1 & 1 & 1 & \cdots & 1 & 1 \\ 1 & 1 & 1 & \cdots & 1 & 1 \\ \hdotsfor{6} \\ 1 & 1 & 1 & \cdots & 1 & 1 \\ \end{bmatrix}\f] @@ -1440,7 +1554,7 @@ the depth should be CV_8U, CV_16U, CV_16S, CV_32F or CV_64F. @param ksize blurring kernel size. @param anchor anchor point; default value Point(-1,-1) means that the anchor is at the kernel center. -@param borderType border mode used to extrapolate pixels outside of the image, see cv::BorderTypes +@param borderType border mode used to extrapolate pixels outside of the image, see #BorderTypes. #BORDER_WRAP is not supported. @sa boxFilter, bilateralFilter, GaussianBlur, medianBlur */ CV_EXPORTS_W void blur( InputArray src, OutputArray dst, @@ -1458,7 +1572,7 @@ The function does actually compute correlation, not the convolution: \f[\texttt{dst} (x,y) = \sum _{ \stackrel{0\leq x' < \texttt{kernel.cols},}{0\leq y' < \texttt{kernel.rows}} } \texttt{kernel} (x',y')* \texttt{src} (x+x'- \texttt{anchor.x} ,y+y'- \texttt{anchor.y} )\f] That is, the kernel is not mirrored around the anchor point. If you need a real convolution, flip -the kernel using cv::flip and set the new anchor to `(kernel.cols - anchor.x - 1, kernel.rows - +the kernel using #flip and set the new anchor to `(kernel.cols - anchor.x - 1, kernel.rows - anchor.y - 1)`. The function uses the DFT-based algorithm in case of sufficiently large kernels (~`11 x 11` or @@ -1474,7 +1588,7 @@ separate color planes using split and process them individually. the kernel; the anchor should lie within the kernel; default value (-1,-1) means that the anchor is at the kernel center. @param delta optional value added to the filtered pixels before storing them in dst. -@param borderType pixel extrapolation method, see cv::BorderTypes +@param borderType pixel extrapolation method, see #BorderTypes. #BORDER_WRAP is not supported. @sa sepFilter2D, dft, matchTemplate */ CV_EXPORTS_W void filter2D( InputArray src, OutputArray dst, int ddepth, @@ -1495,7 +1609,7 @@ kernel kernelY. The final result shifted by delta is stored in dst . @param anchor Anchor position within the kernel. The default value \f$(-1,-1)\f$ means that the anchor is at the kernel center. @param delta Value added to the filtered results before storing them. -@param borderType Pixel extrapolation method, see cv::BorderTypes +@param borderType Pixel extrapolation method, see #BorderTypes. #BORDER_WRAP is not supported. @sa filter2D, Sobel, GaussianBlur, boxFilter, blur */ CV_EXPORTS_W void sepFilter2D( InputArray src, OutputArray dst, int ddepth, @@ -1503,6 +1617,12 @@ CV_EXPORTS_W void sepFilter2D( InputArray src, OutputArray dst, int ddepth, Point anchor = Point(-1,-1), double delta = 0, int borderType = BORDER_DEFAULT ); +/** @example samples/cpp/tutorial_code/ImgTrans/Sobel_Demo.cpp +Sample code using Sobel and/or Scharr OpenCV functions to make a simple Edge Detector +![Sample screenshot](Sobel_Derivatives_Tutorial_Result.jpg) +Check @ref tutorial_sobel_derivatives "the corresponding tutorial" for more details +*/ + /** @brief Calculates the first, second, third, or mixed image derivatives using an extended Sobel operator. In all cases except one, the \f$\texttt{ksize} \times \texttt{ksize}\f$ separable kernel is used to @@ -1510,7 +1630,7 @@ calculate the derivative. When \f$\texttt{ksize = 1}\f$, the \f$3 \times 1\f$ or kernel is used (that is, no Gaussian smoothing is done). `ksize = 1` can only be used for the first or the second x- or y- derivatives. -There is also the special value `ksize = CV_SCHARR (-1)` that corresponds to the \f$3\times3\f$ Scharr +There is also the special value `ksize = #FILTER_SCHARR (-1)` that corresponds to the \f$3\times3\f$ Scharr filter that may give more accurate results than the \f$3\times3\f$ Sobel. The Scharr aperture is \f[\vecthreethree{-3}{0}{3}{-10}{0}{10}{-3}{0}{3}\f] @@ -1540,9 +1660,9 @@ The second case corresponds to a kernel of: @param dy order of the derivative y. @param ksize size of the extended Sobel kernel; it must be 1, 3, 5, or 7. @param scale optional scale factor for the computed derivative values; by default, no scaling is -applied (see cv::getDerivKernels for details). +applied (see #getDerivKernels for details). @param delta optional delta value that is added to the results prior to storing them in dst. -@param borderType pixel extrapolation method, see cv::BorderTypes +@param borderType pixel extrapolation method, see #BorderTypes. #BORDER_WRAP is not supported. @sa Scharr, Laplacian, sepFilter2D, filter2D, GaussianBlur, cartToPolar */ CV_EXPORTS_W void Sobel( InputArray src, OutputArray dst, int ddepth, @@ -1563,7 +1683,8 @@ Sobel( src, dy, CV_16SC1, 0, 1, 3 ); @param dx output image with first-order derivative in x. @param dy output image with first-order derivative in y. @param ksize size of Sobel kernel. It must be 3. -@param borderType pixel extrapolation method, see cv::BorderTypes +@param borderType pixel extrapolation method, see #BorderTypes. + Only #BORDER_DEFAULT=#BORDER_REFLECT_101 and #BORDER_REPLICATE are supported. @sa Sobel */ @@ -1581,7 +1702,7 @@ call is equivalent to -\f[\texttt{Sobel(src, dst, ddepth, dx, dy, CV\_SCHARR, scale, delta, borderType)} .\f] +\f[\texttt{Sobel(src, dst, ddepth, dx, dy, FILTER_SCHARR, scale, delta, borderType)} .\f] @param src input image. @param dst output image of the same size and the same number of channels as src. @@ -1589,17 +1710,17 @@ is equivalent to @param dx order of the derivative x. @param dy order of the derivative y. @param scale optional scale factor for the computed derivative values; by default, no scaling is -applied (see getDerivKernels for details). +applied (see #getDerivKernels for details). @param delta optional delta value that is added to the results prior to storing them in dst. -@param borderType pixel extrapolation method, see cv::BorderTypes +@param borderType pixel extrapolation method, see #BorderTypes. #BORDER_WRAP is not supported. @sa cartToPolar */ CV_EXPORTS_W void Scharr( InputArray src, OutputArray dst, int ddepth, int dx, int dy, double scale = 1, double delta = 0, int borderType = BORDER_DEFAULT ); -/** @example laplace.cpp - An example using Laplace transformations for edge detection +/** @example samples/cpp/laplace.cpp +An example using Laplace transformations for edge detection */ /** @brief Calculates the Laplacian of an image. @@ -1617,12 +1738,12 @@ with the following \f$3 \times 3\f$ aperture: @param src Source image. @param dst Destination image of the same size and the same number of channels as src . @param ddepth Desired depth of the destination image. -@param ksize Aperture size used to compute the second-derivative filters. See getDerivKernels for +@param ksize Aperture size used to compute the second-derivative filters. See #getDerivKernels for details. The size must be positive and odd. @param scale Optional scale factor for the computed Laplacian values. By default, no scaling is -applied. See getDerivKernels for details. +applied. See #getDerivKernels for details. @param delta Optional delta value that is added to the results prior to storing them in dst . -@param borderType Pixel extrapolation method, see cv::BorderTypes +@param borderType Pixel extrapolation method, see #BorderTypes. #BORDER_WRAP is not supported. @sa Sobel, Scharr */ CV_EXPORTS_W void Laplacian( InputArray src, OutputArray dst, int ddepth, @@ -1634,13 +1755,15 @@ CV_EXPORTS_W void Laplacian( InputArray src, OutputArray dst, int ddepth, //! @addtogroup imgproc_feature //! @{ -/** @example edge.cpp - An example on using the canny edge detector +/** @example samples/cpp/edge.cpp +This program demonstrates usage of the Canny edge detector + +Check @ref tutorial_canny_detector "the corresponding tutorial" for more details */ /** @brief Finds edges in an image using the Canny algorithm @cite Canny86 . -The function finds edges in the input image image and marks them in the output map edges using the +The function finds edges in the input image and marks them in the output map edges using the Canny algorithm. The smallest value between threshold1 and threshold2 is used for edge linking. The largest value is used to find initial segments of strong edges. See @@ -1659,6 +1782,25 @@ CV_EXPORTS_W void Canny( InputArray image, OutputArray edges, double threshold1, double threshold2, int apertureSize = 3, bool L2gradient = false ); +/** \overload + +Finds edges in an image using the Canny algorithm with custom image gradient. + +@param dx 16-bit x derivative of input image (CV_16SC1 or CV_16SC3). +@param dy 16-bit y derivative of input image (same type as dx). +@param edges output edge map; single channels 8-bit image, which has the same size as image . +@param threshold1 first threshold for the hysteresis procedure. +@param threshold2 second threshold for the hysteresis procedure. +@param L2gradient a flag, indicating whether a more accurate \f$L_2\f$ norm +\f$=\sqrt{(dI/dx)^2 + (dI/dy)^2}\f$ should be used to calculate the image gradient magnitude ( +L2gradient=true ), or whether the default \f$L_1\f$ norm \f$=|dI/dx|+|dI/dy|\f$ is enough ( +L2gradient=false ). + */ +CV_EXPORTS_W void Canny( InputArray dx, InputArray dy, + OutputArray edges, + double threshold1, double threshold2, + bool L2gradient = false ); + /** @brief Calculates the minimal eigenvalue of gradient matrices for corner detection. The function is similar to cornerEigenValsAndVecs but it calculates and stores only the minimal @@ -1668,9 +1810,9 @@ of the formulae in the cornerEigenValsAndVecs description. @param src Input single-channel 8-bit or floating-point image. @param dst Image to store the minimal eigenvalues. It has the type CV_32FC1 and the same size as src . -@param blockSize Neighborhood size (see the details on cornerEigenValsAndVecs ). +@param blockSize Neighborhood size (see the details on #cornerEigenValsAndVecs ). @param ksize Aperture parameter for the Sobel operator. -@param borderType Pixel extrapolation method. See cv::BorderTypes. +@param borderType Pixel extrapolation method. See #BorderTypes. #BORDER_WRAP is not supported. */ CV_EXPORTS_W void cornerMinEigenVal( InputArray src, OutputArray dst, int blockSize, int ksize = 3, @@ -1690,10 +1832,10 @@ Corners in the image can be found as the local maxima of this response map. @param src Input single-channel 8-bit or floating-point image. @param dst Image to store the Harris detector responses. It has the type CV_32FC1 and the same size as src . -@param blockSize Neighborhood size (see the details on cornerEigenValsAndVecs ). +@param blockSize Neighborhood size (see the details on #cornerEigenValsAndVecs ). @param ksize Aperture parameter for the Sobel operator. -@param k Harris detector free parameter. See the formula below. -@param borderType Pixel extrapolation method. See cv::BorderTypes. +@param k Harris detector free parameter. See the formula above. +@param borderType Pixel extrapolation method. See #BorderTypes. #BORDER_WRAP is not supported. */ CV_EXPORTS_W void cornerHarris( InputArray src, OutputArray dst, int blockSize, int ksize, double k, @@ -1721,7 +1863,7 @@ The output of the function can be used for robust edge or corner detection. @param dst Image to store the results. It has the same size as src and the type CV_32FC(6) . @param blockSize Neighborhood size (see details below). @param ksize Aperture parameter for the Sobel operator. -@param borderType Pixel extrapolation method. See cv::BorderTypes. +@param borderType Pixel extrapolation method. See #BorderTypes. #BORDER_WRAP is not supported. @sa cornerMinEigenVal, cornerHarris, preCornerDetect */ @@ -1750,7 +1892,7 @@ The corners can be found as local maximums of the functions, as shown below: @param src Source single-channel 8-bit of floating-point image. @param dst Output image that has the type CV_32F and the same size as src . @param ksize %Aperture size of the Sobel . -@param borderType Pixel extrapolation method. See cv::BorderTypes. +@param borderType Pixel extrapolation method. See #BorderTypes. #BORDER_WRAP is not supported. */ CV_EXPORTS_W void preCornerDetect( InputArray src, OutputArray dst, int ksize, int borderType = BORDER_DEFAULT ); @@ -1772,7 +1914,7 @@ where \f${DI_{p_i}}\f$ is an image gradient at one of the points \f$p_i\f$ in a value of \f$q\f$ is to be found so that \f$\epsilon_i\f$ is minimized. A system of equations may be set up with \f$\epsilon_i\f$ set to zero: -\f[\sum _i(DI_{p_i} \cdot {DI_{p_i}}^T) - \sum _i(DI_{p_i} \cdot {DI_{p_i}}^T \cdot p_i)\f] +\f[\sum _i(DI_{p_i} \cdot {DI_{p_i}}^T) \cdot q - \sum _i(DI_{p_i} \cdot {DI_{p_i}}^T \cdot p_i)\f] where the gradients are summed within a neighborhood ("search window") of \f$q\f$ . Calling the first gradient term \f$G\f$ and the second gradient term \f$b\f$ gives: @@ -1782,11 +1924,11 @@ gradient term \f$G\f$ and the second gradient term \f$b\f$ gives: The algorithm sets the center of the neighborhood window at this new center \f$q\f$ and then iterates until the center stays within a set threshold. -@param image Input image. +@param image Input single-channel, 8-bit or float image. @param corners Initial coordinates of the input corners and refined coordinates provided for output. @param winSize Half of the side length of the search window. For example, if winSize=Size(5,5) , -then a \f$5*2+1 \times 5*2+1 = 11 \times 11\f$ search window is used. +then a \f$(5*2+1) \times (5*2+1) = 11 \times 11\f$ search window is used. @param zeroZone Half of the size of the dead region in the middle of the search zone over which the summation in the formula below is not done. It is used sometimes to avoid possible singularities of the autocorrelation matrix. The value of (-1,-1) indicates that there is no such @@ -1805,7 +1947,7 @@ The function finds the most prominent corners in the image or in the specified i described in @cite Shi94 - Function calculates the corner quality measure at every source image pixel using the - cornerMinEigenVal or cornerHarris . + #cornerMinEigenVal or #cornerHarris . - Function performs a non-maximum suppression (the local maximums in *3 x 3* neighborhood are retained). - The corners with the minimal eigenvalue less than @@ -1823,10 +1965,11 @@ with qualityLevel=B . @param image Input 8-bit or floating-point 32-bit, single-channel image. @param corners Output vector of detected corners. @param maxCorners Maximum number of corners to return. If there are more corners than are found, -the strongest of them is returned. +the strongest of them is returned. `maxCorners <= 0` implies that no limit on the maximum is set +and all detected corners are returned. @param qualityLevel Parameter characterizing the minimal accepted quality of image corners. The parameter value is multiplied by the best corner quality measure, which is the minimal eigenvalue -(see cornerMinEigenVal ) or the Harris function response (see cornerHarris ). The corners with the +(see #cornerMinEigenVal ) or the Harris function response (see #cornerHarris ). The corners with the quality measure less than the product are rejected. For example, if the best corner has the quality measure = 1500, and the qualityLevel=0.01 , then all the corners with the quality measure less than 15 are rejected. @@ -1835,19 +1978,26 @@ less than 15 are rejected. CV_8UC1 and the same size as image ), it specifies the region in which the corners are detected. @param blockSize Size of an average block for computing a derivative covariation matrix over each pixel neighborhood. See cornerEigenValsAndVecs . -@param useHarrisDetector Parameter indicating whether to use a Harris detector (see cornerHarris) -or cornerMinEigenVal. +@param useHarrisDetector Parameter indicating whether to use a Harris detector (see #cornerHarris) +or #cornerMinEigenVal. @param k Free parameter of the Harris detector. @sa cornerMinEigenVal, cornerHarris, calcOpticalFlowPyrLK, estimateRigidTransform, */ + CV_EXPORTS_W void goodFeaturesToTrack( InputArray image, OutputArray corners, int maxCorners, double qualityLevel, double minDistance, InputArray mask = noArray(), int blockSize = 3, bool useHarrisDetector = false, double k = 0.04 ); -/** @example houghlines.cpp +CV_EXPORTS_W void goodFeaturesToTrack( InputArray image, OutputArray corners, + int maxCorners, double qualityLevel, double minDistance, + InputArray mask, int blockSize, + int gradientSize, bool useHarrisDetector = false, + double k = 0.04 ); +/** @example samples/cpp/tutorial_code/ImgTrans/houghlines.cpp An example using the Hough line detector +![Sample input image](Hough_Lines_Tutorial_Original_Image.jpg) ![Output image](Hough_Lines_Tutorial_Result.jpg) */ /** @brief Finds lines in a binary image using the standard Hough transform. @@ -1857,10 +2007,11 @@ detection. See for a good ex transform. @param image 8-bit, single-channel binary source image. The image may be modified by the function. -@param lines Output vector of lines. Each line is represented by a two-element vector -\f$(\rho, \theta)\f$ . \f$\rho\f$ is the distance from the coordinate origin \f$(0,0)\f$ (top-left corner of +@param lines Output vector of lines. Each line is represented by a 2 or 3 element vector +\f$(\rho, \theta)\f$ or \f$(\rho, \theta, \textrm{votes})\f$ . \f$\rho\f$ is the distance from the coordinate origin \f$(0,0)\f$ (top-left corner of the image). \f$\theta\f$ is the line rotation angle in radians ( \f$0 \sim \textrm{vertical line}, \pi/2 \sim \textrm{horizontal line}\f$ ). +\f$\textrm{votes}\f$ is the value of accumulator. @param rho Distance resolution of the accumulator in pixels. @param theta Angle resolution of the accumulator in radians. @param threshold Accumulator threshold parameter. Only those lines are returned that get enough @@ -1886,58 +2037,7 @@ The function implements the probabilistic Hough transform algorithm for line det in @cite Matas00 See the line detection example below: - -@code - #include - #include - - using namespace cv; - using namespace std; - - int main(int argc, char** argv) - { - Mat src, dst, color_dst; - if( argc != 2 || !(src=imread(argv[1], 0)).data) - return -1; - - Canny( src, dst, 50, 200, 3 ); - cvtColor( dst, color_dst, COLOR_GRAY2BGR ); - - #if 0 - vector lines; - HoughLines( dst, lines, 1, CV_PI/180, 100 ); - - for( size_t i = 0; i < lines.size(); i++ ) - { - float rho = lines[i][0]; - float theta = lines[i][1]; - double a = cos(theta), b = sin(theta); - double x0 = a*rho, y0 = b*rho; - Point pt1(cvRound(x0 + 1000*(-b)), - cvRound(y0 + 1000*(a))); - Point pt2(cvRound(x0 - 1000*(-b)), - cvRound(y0 - 1000*(a))); - line( color_dst, pt1, pt2, Scalar(0,0,255), 3, 8 ); - } - #else - vector lines; - HoughLinesP( dst, lines, 1, CV_PI/180, 80, 30, 10 ); - for( size_t i = 0; i < lines.size(); i++ ) - { - line( color_dst, Point(lines[i][0], lines[i][1]), - Point(lines[i][2], lines[i][3]), Scalar(0,0,255), 3, 8 ); - } - #endif - namedWindow( "Source", 1 ); - imshow( "Source", src ); - - namedWindow( "Detected Lines", 1 ); - imshow( "Detected Lines", color_dst ); - - waitKey(0); - return 0; - } -@endcode +@include snippets/imgproc_HoughLinesP.cpp This is a sample picture the function parameters have been tuned for: ![image](pics/building.jpg) @@ -1963,7 +2063,28 @@ CV_EXPORTS_W void HoughLinesP( InputArray image, OutputArray lines, double rho, double theta, int threshold, double minLineLength = 0, double maxLineGap = 0 ); -/** @example houghcircles.cpp +/** @brief Finds lines in a set of points using the standard Hough transform. + +The function finds lines in a set of points using a modification of the Hough transform. +@include snippets/imgproc_HoughLinesPointSet.cpp +@param _point Input vector of points. Each vector must be encoded as a Point vector \f$(x,y)\f$. Type must be CV_32FC2 or CV_32SC2. +@param _lines Output vector of found lines. Each vector is encoded as a vector \f$(votes, rho, theta)\f$. +The larger the value of 'votes', the higher the reliability of the Hough line. +@param lines_max Max count of hough lines. +@param threshold Accumulator threshold parameter. Only those lines are returned that get enough +votes ( \f$>\texttt{threshold}\f$ ) +@param min_rho Minimum Distance value of the accumulator in pixels. +@param max_rho Maximum Distance value of the accumulator in pixels. +@param rho_step Distance resolution of the accumulator in pixels. +@param min_theta Minimum angle value of the accumulator in radians. +@param max_theta Maximum angle value of the accumulator in radians. +@param theta_step Angle resolution of the accumulator in radians. + */ +CV_EXPORTS_W void HoughLinesPointSet( InputArray _point, OutputArray _lines, int lines_max, int threshold, + double min_rho, double max_rho, double rho_step, + double min_theta, double max_theta, double theta_step ); + +/** @example samples/cpp/tutorial_code/ImgTrans/houghcircles.cpp An example using the Hough circle detector */ @@ -1972,65 +2093,41 @@ An example using the Hough circle detector The function finds circles in a grayscale image using a modification of the Hough transform. Example: : -@code - #include - #include - #include - - using namespace cv; - using namespace std; - - int main(int argc, char** argv) - { - Mat img, gray; - if( argc != 2 || !(img=imread(argv[1], 1)).data) - return -1; - cvtColor(img, gray, COLOR_BGR2GRAY); - // smooth it, otherwise a lot of false circles may be detected - GaussianBlur( gray, gray, Size(9, 9), 2, 2 ); - vector circles; - HoughCircles(gray, circles, HOUGH_GRADIENT, - 2, gray.rows/4, 200, 100 ); - for( size_t i = 0; i < circles.size(); i++ ) - { - Point center(cvRound(circles[i][0]), cvRound(circles[i][1])); - int radius = cvRound(circles[i][2]); - // draw the circle center - circle( img, center, 3, Scalar(0,255,0), -1, 8, 0 ); - // draw the circle outline - circle( img, center, radius, Scalar(0,0,255), 3, 8, 0 ); - } - namedWindow( "circles", 1 ); - imshow( "circles", img ); - - waitKey(0); - return 0; - } -@endcode +@include snippets/imgproc_HoughLinesCircles.cpp @note Usually the function detects the centers of circles well. However, it may fail to find correct radii. You can assist to the function by specifying the radius range ( minRadius and maxRadius ) if -you know it. Or, you may ignore the returned radius, use only the center, and find the correct -radius using an additional procedure. +you know it. Or, in the case of #HOUGH_GRADIENT method you may set maxRadius to a negative number +to return centers only without radius search, and find the correct radius using an additional procedure. + +It also helps to smooth image a bit unless it's already soft. For example, +GaussianBlur() with 7x7 kernel and 1.5x1.5 sigma or similar blurring may help. @param image 8-bit, single-channel, grayscale input image. -@param circles Output vector of found circles. Each vector is encoded as a 3-element -floating-point vector \f$(x, y, radius)\f$ . -@param method Detection method, see cv::HoughModes. Currently, the only implemented method is HOUGH_GRADIENT +@param circles Output vector of found circles. Each vector is encoded as 3 or 4 element +floating-point vector \f$(x, y, radius)\f$ or \f$(x, y, radius, votes)\f$ . +@param method Detection method, see #HoughModes. The available methods are #HOUGH_GRADIENT and #HOUGH_GRADIENT_ALT. @param dp Inverse ratio of the accumulator resolution to the image resolution. For example, if dp=1 , the accumulator has the same resolution as the input image. If dp=2 , the accumulator has -half as big width and height. +half as big width and height. For #HOUGH_GRADIENT_ALT the recommended value is dp=1.5, +unless some small very circles need to be detected. @param minDist Minimum distance between the centers of the detected circles. If the parameter is too small, multiple neighbor circles may be falsely detected in addition to a true one. If it is too large, some circles may be missed. -@param param1 First method-specific parameter. In case of CV_HOUGH_GRADIENT , it is the higher -threshold of the two passed to the Canny edge detector (the lower one is twice smaller). -@param param2 Second method-specific parameter. In case of CV_HOUGH_GRADIENT , it is the +@param param1 First method-specific parameter. In case of #HOUGH_GRADIENT and #HOUGH_GRADIENT_ALT, +it is the higher threshold of the two passed to the Canny edge detector (the lower one is twice smaller). +Note that #HOUGH_GRADIENT_ALT uses #Scharr algorithm to compute image derivatives, so the threshold value +shough normally be higher, such as 300 or normally exposed and contrasty images. +@param param2 Second method-specific parameter. In case of #HOUGH_GRADIENT, it is the accumulator threshold for the circle centers at the detection stage. The smaller it is, the more false circles may be detected. Circles, corresponding to the larger accumulator values, will be -returned first. +returned first. In the case of #HOUGH_GRADIENT_ALT algorithm, this is the circle "perfectness" measure. +The closer it to 1, the better shaped circles algorithm selects. In most cases 0.9 should be fine. +If you want get better detection of small circles, you may decrease it to 0.85, 0.8 or even less. +But then also try to limit the search range [minRadius, maxRadius] to avoid many false circles. @param minRadius Minimum circle radius. -@param maxRadius Maximum circle radius. +@param maxRadius Maximum circle radius. If <= 0, uses the maximum image dimension. If < 0, #HOUGH_GRADIENT returns +centers without finding the radius. #HOUGH_GRADIENT_ALT always computes circle radiuses. @sa fitEllipse, minEnclosingCircle */ @@ -2044,8 +2141,10 @@ CV_EXPORTS_W void HoughCircles( InputArray image, OutputArray circles, //! @addtogroup imgproc_filter //! @{ -/** @example morphology2.cpp - An example using the morphological operations +/** @example samples/cpp/tutorial_code/ImgProc/Morphology_2.cpp +Advanced morphology Transformations sample code +![Sample screenshot](Morphology_2_Tutorial_Result.jpg) +Check @ref tutorial_opening_closing_hats "the corresponding tutorial" for more details */ /** @brief Erodes an image by using a specific structuring element. @@ -2062,11 +2161,11 @@ case of multi-channel images, each channel is processed independently. CV_8U, CV_16U, CV_16S, CV_32F or CV_64F. @param dst output image of the same size and type as src. @param kernel structuring element used for erosion; if `element=Mat()`, a `3 x 3` rectangular -structuring element is used. Kernel can be created using getStructuringElement. +structuring element is used. Kernel can be created using #getStructuringElement. @param anchor position of the anchor within the element; default value (-1, -1) means that the anchor is at the element center. @param iterations number of times erosion is applied. -@param borderType pixel extrapolation method, see cv::BorderTypes +@param borderType pixel extrapolation method, see #BorderTypes. #BORDER_WRAP is not supported. @param borderValue border value in case of a constant border @sa dilate, morphologyEx, getStructuringElement */ @@ -2075,6 +2174,12 @@ CV_EXPORTS_W void erode( InputArray src, OutputArray dst, InputArray kernel, int borderType = BORDER_CONSTANT, const Scalar& borderValue = morphologyDefaultBorderValue() ); +/** @example samples/cpp/tutorial_code/ImgProc/Morphology_1.cpp +Erosion and Dilation sample code +![Sample Screenshot-Erosion](Morphology_1_Tutorial_Erosion_Result.jpg)![Sample Screenshot-Dilation](Morphology_1_Tutorial_Dilation_Result.jpg) +Check @ref tutorial_erosion_dilatation "the corresponding tutorial" for more details +*/ + /** @brief Dilates an image by using a specific structuring element. The function dilates the source image using the specified structuring element that determines the @@ -2086,13 +2191,13 @@ case of multi-channel images, each channel is processed independently. @param src input image; the number of channels can be arbitrary, but the depth should be one of CV_8U, CV_16U, CV_16S, CV_32F or CV_64F. -@param dst output image of the same size and type as src\`. +@param dst output image of the same size and type as src. @param kernel structuring element used for dilation; if elemenat=Mat(), a 3 x 3 rectangular -structuring element is used. Kernel can be created using getStructuringElement +structuring element is used. Kernel can be created using #getStructuringElement @param anchor position of the anchor within the element; default value (-1, -1) means that the anchor is at the element center. @param iterations number of times dilation is applied. -@param borderType pixel extrapolation method, see cv::BorderTypes +@param borderType pixel extrapolation method, see #BorderTypes. #BORDER_WRAP is not suported. @param borderValue border value in case of a constant border @sa erode, morphologyEx, getStructuringElement */ @@ -2103,7 +2208,7 @@ CV_EXPORTS_W void dilate( InputArray src, OutputArray dst, InputArray kernel, /** @brief Performs advanced morphological transformations. -The function morphologyEx can perform advanced morphological transformations using an erosion and dilation as +The function cv::morphologyEx can perform advanced morphological transformations using an erosion and dilation as basic operations. Any of the operations can be done in-place. In case of multi-channel images, each channel is @@ -2112,15 +2217,18 @@ processed independently. @param src Source image. The number of channels can be arbitrary. The depth should be one of CV_8U, CV_16U, CV_16S, CV_32F or CV_64F. @param dst Destination image of the same size and type as source image. -@param op Type of a morphological operation, see cv::MorphTypes -@param kernel Structuring element. It can be created using cv::getStructuringElement. +@param op Type of a morphological operation, see #MorphTypes +@param kernel Structuring element. It can be created using #getStructuringElement. @param anchor Anchor position with the kernel. Negative values mean that the anchor is at the kernel center. @param iterations Number of times erosion and dilation are applied. -@param borderType Pixel extrapolation method, see cv::BorderTypes +@param borderType Pixel extrapolation method, see #BorderTypes. #BORDER_WRAP is not supported. @param borderValue Border value in case of a constant border. The default value has a special meaning. @sa dilate, erode, getStructuringElement +@note The number of iterations is the number of times erosion or dilatation operation will be applied. +For instance, an opening operation (#MORPH_OPEN) with two iterations is equivalent to apply +successively: erode -> erode -> dilate -> dilate (and not erode -> dilate -> erode -> dilate). */ CV_EXPORTS_W void morphologyEx( InputArray src, OutputArray dst, int op, InputArray kernel, @@ -2149,8 +2257,8 @@ If you want to decimate the image by factor of 2 in each direction, you can call // specify fx and fy and let the function compute the destination image size. resize(src, dst, Size(), 0.5, 0.5, interpolation); @endcode -To shrink an image, it will generally look best with cv::INTER_AREA interpolation, whereas to -enlarge an image, it will generally look best with cv::INTER_CUBIC (slow) or cv::INTER_LINEAR +To shrink an image, it will generally look best with #INTER_AREA interpolation, whereas to +enlarge an image, it will generally look best with c#INTER_CUBIC (slow) or #INTER_LINEAR (faster but still looks OK). @param src input image. @@ -2163,7 +2271,7 @@ src.size(), fx, and fy; the type of dst is the same as of src. \f[\texttt{(double)dsize.width/src.cols}\f] @param fy scale factor along the vertical axis; when it equals 0, it is computed as \f[\texttt{(double)dsize.height/src.rows}\f] -@param interpolation interpolation method, see cv::InterpolationFlags +@param interpolation interpolation method, see #InterpolationFlags @sa warpAffine, warpPerspective, remap */ @@ -2177,19 +2285,19 @@ The function warpAffine transforms the source image using the specified matrix: \f[\texttt{dst} (x,y) = \texttt{src} ( \texttt{M} _{11} x + \texttt{M} _{12} y + \texttt{M} _{13}, \texttt{M} _{21} x + \texttt{M} _{22} y + \texttt{M} _{23})\f] -when the flag WARP_INVERSE_MAP is set. Otherwise, the transformation is first inverted -with cv::invertAffineTransform and then put in the formula above instead of M. The function cannot +when the flag #WARP_INVERSE_MAP is set. Otherwise, the transformation is first inverted +with #invertAffineTransform and then put in the formula above instead of M. The function cannot operate in-place. @param src input image. @param dst output image that has the size dsize and the same type as src . @param M \f$2\times 3\f$ transformation matrix. @param dsize size of the output image. -@param flags combination of interpolation methods (see cv::InterpolationFlags) and the optional -flag WARP_INVERSE_MAP that means that M is the inverse transformation ( +@param flags combination of interpolation methods (see #InterpolationFlags) and the optional +flag #WARP_INVERSE_MAP that means that M is the inverse transformation ( \f$\texttt{dst}\rightarrow\texttt{src}\f$ ). -@param borderMode pixel extrapolation method (see cv::BorderTypes); when -borderMode=BORDER_TRANSPARENT, it means that the pixels in the destination image corresponding to +@param borderMode pixel extrapolation method (see #BorderTypes); when +borderMode=#BORDER_TRANSPARENT, it means that the pixels in the destination image corresponding to the "outliers" in the source image are not modified by the function. @param borderValue value used in case of a constant border; by default, it is 0. @@ -2201,6 +2309,10 @@ CV_EXPORTS_W void warpAffine( InputArray src, OutputArray dst, int borderMode = BORDER_CONSTANT, const Scalar& borderValue = Scalar()); +/** @example samples/cpp/warpPerspective_demo.cpp +An example program shows using cv::findHomography and cv::warpPerspective for image warping +*/ + /** @brief Applies a perspective transformation to an image. The function warpPerspective transforms the source image using the specified matrix: @@ -2208,17 +2320,17 @@ The function warpPerspective transforms the source image using the specified mat \f[\texttt{dst} (x,y) = \texttt{src} \left ( \frac{M_{11} x + M_{12} y + M_{13}}{M_{31} x + M_{32} y + M_{33}} , \frac{M_{21} x + M_{22} y + M_{23}}{M_{31} x + M_{32} y + M_{33}} \right )\f] -when the flag WARP_INVERSE_MAP is set. Otherwise, the transformation is first inverted with invert +when the flag #WARP_INVERSE_MAP is set. Otherwise, the transformation is first inverted with invert and then put in the formula above instead of M. The function cannot operate in-place. @param src input image. @param dst output image that has the size dsize and the same type as src . @param M \f$3\times 3\f$ transformation matrix. @param dsize size of the output image. -@param flags combination of interpolation methods (INTER_LINEAR or INTER_NEAREST) and the -optional flag WARP_INVERSE_MAP, that sets M as the inverse transformation ( +@param flags combination of interpolation methods (#INTER_LINEAR or #INTER_NEAREST) and the +optional flag #WARP_INVERSE_MAP, that sets M as the inverse transformation ( \f$\texttt{dst}\rightarrow\texttt{src}\f$ ). -@param borderMode pixel extrapolation method (BORDER_CONSTANT or BORDER_REPLICATE). +@param borderMode pixel extrapolation method (#BORDER_CONSTANT or #BORDER_REPLICATE). @param borderValue value used in case of a constant border; by default, it equals 0. @sa warpAffine, resize, remap, getRectSubPix, perspectiveTransform @@ -2252,12 +2364,14 @@ CV_32FC1, or CV_32FC2. See convertMaps for details on converting a floating poin representation to fixed-point for speed. @param map2 The second map of y values having the type CV_16UC1, CV_32FC1, or none (empty map if map1 is (x,y) points), respectively. -@param interpolation Interpolation method (see cv::InterpolationFlags). The method INTER_AREA is +@param interpolation Interpolation method (see #InterpolationFlags). The method #INTER_AREA is not supported by this function. -@param borderMode Pixel extrapolation method (see cv::BorderTypes). When -borderMode=BORDER_TRANSPARENT, it means that the pixels in the destination image that +@param borderMode Pixel extrapolation method (see #BorderTypes). When +borderMode=#BORDER_TRANSPARENT, it means that the pixels in the destination image that corresponds to the "outliers" in the source image are not modified by the function. @param borderValue Value used in case of a constant border. By default, it is 0. +@note +Due to current implementation limitations the size of an input and output images should be less than 32767x32767. */ CV_EXPORTS_W void remap( InputArray src, OutputArray dst, InputArray map1, InputArray map2, @@ -2270,13 +2384,13 @@ The function converts a pair of maps for remap from one representation to anothe options ( (map1.type(), map2.type()) \f$\rightarrow\f$ (dstmap1.type(), dstmap2.type()) ) are supported: -- \f$\texttt{(CV\_32FC1, CV\_32FC1)} \rightarrow \texttt{(CV\_16SC2, CV\_16UC1)}\f$. This is the +- \f$\texttt{(CV_32FC1, CV_32FC1)} \rightarrow \texttt{(CV_16SC2, CV_16UC1)}\f$. This is the most frequently used conversion operation, in which the original floating-point maps (see remap ) are converted to a more compact and much faster fixed-point representation. The first output array contains the rounded coordinates and the second array (created only when nninterpolation=false ) contains indices in the interpolation tables. -- \f$\texttt{(CV\_32FC2)} \rightarrow \texttt{(CV\_16SC2, CV\_16UC1)}\f$. The same as above but +- \f$\texttt{(CV_32FC2)} \rightarrow \texttt{(CV_16SC2, CV_16UC1)}\f$. The same as above but the original maps are stored in one 2-channel matrix. - Reverse conversion. Obviously, the reconstructed floating-point maps will not be exactly the same @@ -2317,16 +2431,22 @@ coordinate origin is assumed to be the top-left corner). @sa getAffineTransform, warpAffine, transform */ -CV_EXPORTS_W Mat getRotationMatrix2D( Point2f center, double angle, double scale ); +CV_EXPORTS_W Mat getRotationMatrix2D(Point2f center, double angle, double scale); -//! returns 3x3 perspective transformation for the corresponding 4 point pairs. -CV_EXPORTS Mat getPerspectiveTransform( const Point2f src[], const Point2f dst[] ); +/** @sa getRotationMatrix2D */ +CV_EXPORTS Matx23d getRotationMatrix2D_(Point2f center, double angle, double scale); + +inline +Mat getRotationMatrix2D(Point2f center, double angle, double scale) +{ + return Mat(getRotationMatrix2D_(center, angle, scale), true); +} /** @brief Calculates an affine transform from three pairs of the corresponding points. The function calculates the \f$2 \times 3\f$ matrix of an affine transform so that: -\f[\begin{bmatrix} x'_i \\ y'_i \end{bmatrix} = \texttt{map\_matrix} \cdot \begin{bmatrix} x_i \\ y_i \\ 1 \end{bmatrix}\f] +\f[\begin{bmatrix} x'_i \\ y'_i \end{bmatrix} = \texttt{map_matrix} \cdot \begin{bmatrix} x_i \\ y_i \\ 1 \end{bmatrix}\f] where @@ -2356,7 +2476,7 @@ CV_EXPORTS_W void invertAffineTransform( InputArray M, OutputArray iM ); The function calculates the \f$3 \times 3\f$ matrix of a perspective transform so that: -\f[\begin{bmatrix} t_i x'_i \\ t_i y'_i \\ t_i \end{bmatrix} = \texttt{map\_matrix} \cdot \begin{bmatrix} x_i \\ y_i \\ 1 \end{bmatrix}\f] +\f[\begin{bmatrix} t_i x'_i \\ t_i y'_i \\ t_i \end{bmatrix} = \texttt{map_matrix} \cdot \begin{bmatrix} x_i \\ y_i \\ 1 \end{bmatrix}\f] where @@ -2364,10 +2484,15 @@ where @param src Coordinates of quadrangle vertices in the source image. @param dst Coordinates of the corresponding quadrangle vertices in the destination image. +@param solveMethod method passed to cv::solve (#DecompTypes) @sa findHomography, warpPerspective, perspectiveTransform */ -CV_EXPORTS_W Mat getPerspectiveTransform( InputArray src, InputArray dst ); +CV_EXPORTS_W Mat getPerspectiveTransform(InputArray src, InputArray dst, int solveMethod = DECOMP_LU); + +/** @overload */ +CV_EXPORTS Mat getPerspectiveTransform(const Point2f src[], const Point2f dst[], int solveMethod = DECOMP_LU); + CV_EXPORTS_W Mat getAffineTransform( InputArray src, InputArray dst ); @@ -2375,13 +2500,12 @@ CV_EXPORTS_W Mat getAffineTransform( InputArray src, InputArray dst ); The function getRectSubPix extracts pixels from src: -\f[dst(x, y) = src(x + \texttt{center.x} - ( \texttt{dst.cols} -1)*0.5, y + \texttt{center.y} - ( \texttt{dst.rows} -1)*0.5)\f] +\f[patch(x, y) = src(x + \texttt{center.x} - ( \texttt{dst.cols} -1)*0.5, y + \texttt{center.y} - ( \texttt{dst.rows} -1)*0.5)\f] where the values of the pixels at non-integer coordinates are retrieved using bilinear -interpolation. Every channel of multi-channel images is processed independently. While the center of -the rectangle must be inside the image, parts of the rectangle may be outside. In this case, the -replication border mode (see cv::BorderTypes) is used to extrapolate the pixel values outside of -the image. +interpolation. Every channel of multi-channel images is processed independently. Also +the image should be a single channel or three channel image. While the center of the +rectangle must be inside the image, parts of the rectangle may be outside. @param image Source image. @param patchSize Size of the extracted patch. @@ -2395,48 +2519,185 @@ source image. The center must be inside the image. CV_EXPORTS_W void getRectSubPix( InputArray image, Size patchSize, Point2f center, OutputArray patch, int patchType = -1 ); -/** @example polar_transforms.cpp +/** @example samples/cpp/polar_transforms.cpp An example using the cv::linearPolar and cv::logPolar operations */ -/** @brief Remaps an image to log-polar space. +/** @brief Remaps an image to semilog-polar coordinates space. + +@deprecated This function produces same result as cv::warpPolar(src, dst, src.size(), center, maxRadius, flags+WARP_POLAR_LOG); + +@internal +Transform the source image using the following transformation (See @ref polar_remaps_reference_image "Polar remaps reference image d)"): +\f[\begin{array}{l} + dst( \rho , \phi ) = src(x,y) \\ + dst.size() \leftarrow src.size() +\end{array}\f] -transforms the source image using the following transformation: -\f[dst( \phi , \rho ) = src(x,y)\f] where -\f[\rho = M \cdot \log{\sqrt{x^2 + y^2}} , \phi =atan(y/x)\f] +\f[\begin{array}{l} + I = (dx,dy) = (x - center.x,y - center.y) \\ + \rho = M \cdot log_e(\texttt{magnitude} (I)) ,\\ + \phi = Kangle \cdot \texttt{angle} (I) \\ +\end{array}\f] -The function emulates the human "foveal" vision and can be used for fast scale and -rotation-invariant template matching, for object tracking and so forth. The function can not operate -in-place. +and +\f[\begin{array}{l} + M = src.cols / log_e(maxRadius) \\ + Kangle = src.rows / 2\Pi \\ +\end{array}\f] +The function emulates the human "foveal" vision and can be used for fast scale and +rotation-invariant template matching, for object tracking and so forth. @param src Source image -@param dst Destination image +@param dst Destination image. It will have same size and type as src. @param center The transformation center; where the output precision is maximal -@param M Magnitude scale parameter. -@param flags A combination of interpolation methods, see cv::InterpolationFlags - */ +@param M Magnitude scale parameter. It determines the radius of the bounding circle to transform too. +@param flags A combination of interpolation methods, see #InterpolationFlags + +@note +- The function can not operate in-place. +- To calculate magnitude and angle in degrees #cartToPolar is used internally thus angles are measured from 0 to 360 with accuracy about 0.3 degrees. + +@sa cv::linearPolar +@endinternal +*/ CV_EXPORTS_W void logPolar( InputArray src, OutputArray dst, Point2f center, double M, int flags ); -/** @brief Remaps an image to polar space. +/** @brief Remaps an image to polar coordinates space. + +@deprecated This function produces same result as cv::warpPolar(src, dst, src.size(), center, maxRadius, flags) + +@internal +Transform the source image using the following transformation (See @ref polar_remaps_reference_image "Polar remaps reference image c)"): +\f[\begin{array}{l} + dst( \rho , \phi ) = src(x,y) \\ + dst.size() \leftarrow src.size() +\end{array}\f] -transforms the source image using the following transformation: -\f[dst( \phi , \rho ) = src(x,y)\f] where -\f[\rho = (src.width/maxRadius) \cdot \sqrt{x^2 + y^2} , \phi =atan(y/x)\f] +\f[\begin{array}{l} + I = (dx,dy) = (x - center.x,y - center.y) \\ + \rho = Kmag \cdot \texttt{magnitude} (I) ,\\ + \phi = angle \cdot \texttt{angle} (I) +\end{array}\f] + +and +\f[\begin{array}{l} + Kx = src.cols / maxRadius \\ + Ky = src.rows / 2\Pi +\end{array}\f] -The function can not operate in-place. @param src Source image -@param dst Destination image +@param dst Destination image. It will have same size and type as src. @param center The transformation center; -@param maxRadius Inverse magnitude scale parameter -@param flags A combination of interpolation methods, see cv::InterpolationFlags - */ +@param maxRadius The radius of the bounding circle to transform. It determines the inverse magnitude scale parameter too. +@param flags A combination of interpolation methods, see #InterpolationFlags + +@note +- The function can not operate in-place. +- To calculate magnitude and angle in degrees #cartToPolar is used internally thus angles are measured from 0 to 360 with accuracy about 0.3 degrees. + +@sa cv::logPolar +@endinternal +*/ CV_EXPORTS_W void linearPolar( InputArray src, OutputArray dst, Point2f center, double maxRadius, int flags ); + +/** \brief Remaps an image to polar or semilog-polar coordinates space + +@anchor polar_remaps_reference_image +![Polar remaps reference](pics/polar_remap_doc.png) + +Transform the source image using the following transformation: +\f[ +dst(\rho , \phi ) = src(x,y) +\f] + +where +\f[ +\begin{array}{l} +\vec{I} = (x - center.x, \;y - center.y) \\ +\phi = Kangle \cdot \texttt{angle} (\vec{I}) \\ +\rho = \left\{\begin{matrix} +Klin \cdot \texttt{magnitude} (\vec{I}) & default \\ +Klog \cdot log_e(\texttt{magnitude} (\vec{I})) & if \; semilog \\ +\end{matrix}\right. +\end{array} +\f] + +and +\f[ +\begin{array}{l} +Kangle = dsize.height / 2\Pi \\ +Klin = dsize.width / maxRadius \\ +Klog = dsize.width / log_e(maxRadius) \\ +\end{array} +\f] + + +\par Linear vs semilog mapping + +Polar mapping can be linear or semi-log. Add one of #WarpPolarMode to `flags` to specify the polar mapping mode. + +Linear is the default mode. + +The semilog mapping emulates the human "foveal" vision that permit very high acuity on the line of sight (central vision) +in contrast to peripheral vision where acuity is minor. + +\par Option on `dsize`: + +- if both values in `dsize <=0 ` (default), +the destination image will have (almost) same area of source bounding circle: +\f[\begin{array}{l} +dsize.area \leftarrow (maxRadius^2 \cdot \Pi) \\ +dsize.width = \texttt{cvRound}(maxRadius) \\ +dsize.height = \texttt{cvRound}(maxRadius \cdot \Pi) \\ +\end{array}\f] + + +- if only `dsize.height <= 0`, +the destination image area will be proportional to the bounding circle area but scaled by `Kx * Kx`: +\f[\begin{array}{l} +dsize.height = \texttt{cvRound}(dsize.width \cdot \Pi) \\ +\end{array} +\f] + +- if both values in `dsize > 0 `, +the destination image will have the given size therefore the area of the bounding circle will be scaled to `dsize`. + + +\par Reverse mapping + +You can get reverse mapping adding #WARP_INVERSE_MAP to `flags` +\snippet polar_transforms.cpp InverseMap + +In addiction, to calculate the original coordinate from a polar mapped coordinate \f$(rho, phi)->(x, y)\f$: +\snippet polar_transforms.cpp InverseCoordinate + +@param src Source image. +@param dst Destination image. It will have same type as src. +@param dsize The destination image size (see description for valid options). +@param center The transformation center. +@param maxRadius The radius of the bounding circle to transform. It determines the inverse magnitude scale parameter too. +@param flags A combination of interpolation methods, #InterpolationFlags + #WarpPolarMode. + - Add #WARP_POLAR_LINEAR to select linear polar mapping (default) + - Add #WARP_POLAR_LOG to select semilog polar mapping + - Add #WARP_INVERSE_MAP for reverse mapping. +@note +- The function can not operate in-place. +- To calculate magnitude and angle in degrees #cartToPolar is used internally thus angles are measured from 0 to 360 with accuracy about 0.3 degrees. +- This function uses #remap. Due to current implementation limitations the size of an input and output images should be less than 32767x32767. + +@sa cv::remap +*/ +CV_EXPORTS_W void warpPolar(InputArray src, OutputArray dst, Size dsize, + Point2f center, double maxRadius, int flags); + + //! @} imgproc_transform //! @addtogroup imgproc_misc @@ -2451,7 +2712,7 @@ CV_EXPORTS_AS(integral2) void integral( InputArray src, OutputArray sum, /** @brief Calculates the integral of an image. -The functions calculate one or more integral images for the source image as follows: +The function calculates one or more integral images for the source image as follows: \f[\texttt{sum} (X,Y) = \sum _{x 1) @param type Created array type */ CV_EXPORTS_W void createHanningWindow(OutputArray dst, Size winSize, int type); @@ -2634,24 +2894,25 @@ CV_EXPORTS_W void createHanningWindow(OutputArray dst, Size winSize, int type); /** @brief Applies a fixed-level threshold to each array element. -The function applies fixed-level thresholding to a single-channel array. The function is typically -used to get a bi-level (binary) image out of a grayscale image ( cv::compare could be also used for +The function applies fixed-level thresholding to a multiple-channel array. The function is typically +used to get a bi-level (binary) image out of a grayscale image ( #compare could be also used for this purpose) or for removing a noise, that is, filtering out pixels with too small or too large values. There are several types of thresholding supported by the function. They are determined by type parameter. -Also, the special values cv::THRESH_OTSU or cv::THRESH_TRIANGLE may be combined with one of the +Also, the special values #THRESH_OTSU or #THRESH_TRIANGLE may be combined with one of the above values. In these cases, the function determines the optimal threshold value using the Otsu's -or Triangle algorithm and uses it instead of the specified thresh . The function returns the -computed threshold value. Currently, the Otsu's and Triangle methods are implemented only for 8-bit -images. +or Triangle algorithm and uses it instead of the specified thresh. + +@note Currently, the Otsu's and Triangle methods are implemented only for 8-bit single-channel images. -@param src input array (single-channel, 8-bit or 32-bit floating point). -@param dst output array of the same size and type as src. +@param src input array (multiple-channel, 8-bit or 32-bit floating point). +@param dst output array of the same size and type and the same number of channels as src. @param thresh threshold value. -@param maxval maximum value to use with the THRESH_BINARY and THRESH_BINARY_INV thresholding +@param maxval maximum value to use with the #THRESH_BINARY and #THRESH_BINARY_INV thresholding types. -@param type thresholding type (see the cv::ThresholdTypes). +@param type thresholding type (see #ThresholdTypes). +@return the computed threshold value if Otsu's or Triangle methods used. @sa adaptiveThreshold, findContours, compare, min, max */ @@ -2673,9 +2934,10 @@ The function can process the image in-place. @param src Source 8-bit single-channel image. @param dst Destination image of the same size and the same type as src. @param maxValue Non-zero value assigned to the pixels for which the condition is satisfied -@param adaptiveMethod Adaptive thresholding algorithm to use, see cv::AdaptiveThresholdTypes -@param thresholdType Thresholding type that must be either THRESH_BINARY or THRESH_BINARY_INV, -see cv::ThresholdTypes. +@param adaptiveMethod Adaptive thresholding algorithm to use, see #AdaptiveThresholdTypes. +The #BORDER_REPLICATE | #BORDER_ISOLATED is used to process boundaries. +@param thresholdType Thresholding type that must be either #THRESH_BINARY or #THRESH_BINARY_INV, +see #ThresholdTypes. @param blockSize Size of a pixel neighborhood that is used to calculate a threshold value for the pixel: 3, 5, 7, and so on. @param C Constant subtracted from the mean or weighted mean (see the details below). Normally, it @@ -2692,6 +2954,10 @@ CV_EXPORTS_W void adaptiveThreshold( InputArray src, OutputArray dst, //! @addtogroup imgproc_filter //! @{ +/** @example samples/cpp/tutorial_code/ImgProc/Pyramids/Pyramids.cpp +An example using pyrDown and pyrUp functions +*/ + /** @brief Blurs an image and downsamples it. By default, size of the output image is computed as `Size((src.cols+1)/2, (src.rows+1)/2)`, but in @@ -2709,7 +2975,7 @@ Then, it downsamples the image by rejecting even rows and columns. @param src input image. @param dst output image; it has the specified size and the same type as src. @param dstsize size of the output image. -@param borderType Pixel extrapolation method, see cv::BorderTypes (BORDER_CONSTANT isn't supported) +@param borderType Pixel extrapolation method, see #BorderTypes (#BORDER_CONSTANT isn't supported) */ CV_EXPORTS_W void pyrDown( InputArray src, OutputArray dst, const Size& dstsize = Size(), int borderType = BORDER_DEFAULT ); @@ -2729,7 +2995,7 @@ pyrDown multiplied by 4. @param src input image. @param dst output image. It has the specified size and the same type as src . @param dstsize size of the output image. -@param borderType Pixel extrapolation method, see cv::BorderTypes (only BORDER_DEFAULT is supported) +@param borderType Pixel extrapolation method, see #BorderTypes (only #BORDER_DEFAULT is supported) */ CV_EXPORTS_W void pyrUp( InputArray src, OutputArray dst, const Size& dstsize = Size(), int borderType = BORDER_DEFAULT ); @@ -2743,261 +3009,28 @@ pyrDown to the previously built pyramid layers, starting from `dst[0]==src`. @param dst Destination vector of maxlevel+1 images of the same type as src. dst[0] will be the same as src. dst[1] is the next pyramid layer, a smoothed and down-sized src, and so on. @param maxlevel 0-based index of the last (the smallest) pyramid layer. It must be non-negative. -@param borderType Pixel extrapolation method, see cv::BorderTypes (BORDER_CONSTANT isn't supported) +@param borderType Pixel extrapolation method, see #BorderTypes (#BORDER_CONSTANT isn't supported) */ CV_EXPORTS void buildPyramid( InputArray src, OutputArrayOfArrays dst, int maxlevel, int borderType = BORDER_DEFAULT ); //! @} imgproc_filter -//! @addtogroup imgproc_transform -//! @{ - -/** @brief Transforms an image to compensate for lens distortion. - -The function transforms an image to compensate radial and tangential lens distortion. - -The function is simply a combination of cv::initUndistortRectifyMap (with unity R ) and cv::remap -(with bilinear interpolation). See the former function for details of the transformation being -performed. - -Those pixels in the destination image, for which there is no correspondent pixels in the source -image, are filled with zeros (black color). - -A particular subset of the source image that will be visible in the corrected image can be regulated -by newCameraMatrix. You can use cv::getOptimalNewCameraMatrix to compute the appropriate -newCameraMatrix depending on your requirements. - -The camera matrix and the distortion parameters can be determined using cv::calibrateCamera. If -the resolution of images is different from the resolution used at the calibration stage, \f$f_x, -f_y, c_x\f$ and \f$c_y\f$ need to be scaled accordingly, while the distortion coefficients remain -the same. - -@param src Input (distorted) image. -@param dst Output (corrected) image that has the same size and type as src . -@param cameraMatrix Input camera matrix \f$A = \vecthreethree{f_x}{0}{c_x}{0}{f_y}{c_y}{0}{0}{1}\f$ . -@param distCoeffs Input vector of distortion coefficients -\f$(k_1, k_2, p_1, p_2[, k_3[, k_4, k_5, k_6[, s_1, s_2, s_3, s_4[, \tau_x, \tau_y]]]])\f$ -of 4, 5, 8, 12 or 14 elements. If the vector is NULL/empty, the zero distortion coefficients are assumed. -@param newCameraMatrix Camera matrix of the distorted image. By default, it is the same as -cameraMatrix but you may additionally scale and shift the result by using a different matrix. - */ -CV_EXPORTS_W void undistort( InputArray src, OutputArray dst, - InputArray cameraMatrix, - InputArray distCoeffs, - InputArray newCameraMatrix = noArray() ); - -/** @brief Computes the undistortion and rectification transformation map. - -The function computes the joint undistortion and rectification transformation and represents the -result in the form of maps for remap. The undistorted image looks like original, as if it is -captured with a camera using the camera matrix =newCameraMatrix and zero distortion. In case of a -monocular camera, newCameraMatrix is usually equal to cameraMatrix, or it can be computed by -cv::getOptimalNewCameraMatrix for a better control over scaling. In case of a stereo camera, -newCameraMatrix is normally set to P1 or P2 computed by cv::stereoRectify . - -Also, this new camera is oriented differently in the coordinate space, according to R. That, for -example, helps to align two heads of a stereo camera so that the epipolar lines on both images -become horizontal and have the same y- coordinate (in case of a horizontally aligned stereo camera). - -The function actually builds the maps for the inverse mapping algorithm that is used by remap. That -is, for each pixel \f$(u, v)\f$ in the destination (corrected and rectified) image, the function -computes the corresponding coordinates in the source image (that is, in the original image from -camera). The following process is applied: -\f[ -\begin{array}{l} -x \leftarrow (u - {c'}_x)/{f'}_x \\ -y \leftarrow (v - {c'}_y)/{f'}_y \\ -{[X\,Y\,W]} ^T \leftarrow R^{-1}*[x \, y \, 1]^T \\ -x' \leftarrow X/W \\ -y' \leftarrow Y/W \\ -r^2 \leftarrow x'^2 + y'^2 \\ -x'' \leftarrow x' \frac{1 + k_1 r^2 + k_2 r^4 + k_3 r^6}{1 + k_4 r^2 + k_5 r^4 + k_6 r^6} -+ 2p_1 x' y' + p_2(r^2 + 2 x'^2) + s_1 r^2 + s_2 r^4\\ -y'' \leftarrow y' \frac{1 + k_1 r^2 + k_2 r^4 + k_3 r^6}{1 + k_4 r^2 + k_5 r^4 + k_6 r^6} -+ p_1 (r^2 + 2 y'^2) + 2 p_2 x' y' + s_3 r^2 + s_4 r^4 \\ -s\vecthree{x'''}{y'''}{1} = -\vecthreethree{R_{33}(\tau_x, \tau_y)}{0}{-R_{13}((\tau_x, \tau_y)} -{0}{R_{33}(\tau_x, \tau_y)}{-R_{23}(\tau_x, \tau_y)} -{0}{0}{1} R(\tau_x, \tau_y) \vecthree{x''}{y''}{1}\\ -map_x(u,v) \leftarrow x''' f_x + c_x \\ -map_y(u,v) \leftarrow y''' f_y + c_y -\end{array} -\f] -where \f$(k_1, k_2, p_1, p_2[, k_3[, k_4, k_5, k_6[, s_1, s_2, s_3, s_4[, \tau_x, \tau_y]]]])\f$ -are the distortion coefficients. - -In case of a stereo camera, this function is called twice: once for each camera head, after -stereoRectify, which in its turn is called after cv::stereoCalibrate. But if the stereo camera -was not calibrated, it is still possible to compute the rectification transformations directly from -the fundamental matrix using cv::stereoRectifyUncalibrated. For each camera, the function computes -homography H as the rectification transformation in a pixel domain, not a rotation matrix R in 3D -space. R can be computed from H as -\f[\texttt{R} = \texttt{cameraMatrix} ^{-1} \cdot \texttt{H} \cdot \texttt{cameraMatrix}\f] -where cameraMatrix can be chosen arbitrarily. - -@param cameraMatrix Input camera matrix \f$A=\vecthreethree{f_x}{0}{c_x}{0}{f_y}{c_y}{0}{0}{1}\f$ . -@param distCoeffs Input vector of distortion coefficients -\f$(k_1, k_2, p_1, p_2[, k_3[, k_4, k_5, k_6[, s_1, s_2, s_3, s_4[, \tau_x, \tau_y]]]])\f$ -of 4, 5, 8, 12 or 14 elements. If the vector is NULL/empty, the zero distortion coefficients are assumed. -@param R Optional rectification transformation in the object space (3x3 matrix). R1 or R2 , -computed by stereoRectify can be passed here. If the matrix is empty, the identity transformation -is assumed. In cvInitUndistortMap R assumed to be an identity matrix. -@param newCameraMatrix New camera matrix \f$A'=\vecthreethree{f_x'}{0}{c_x'}{0}{f_y'}{c_y'}{0}{0}{1}\f$. -@param size Undistorted image size. -@param m1type Type of the first output map that can be CV_32FC1 or CV_16SC2, see cv::convertMaps -@param map1 The first output map. -@param map2 The second output map. - */ -CV_EXPORTS_W void initUndistortRectifyMap( InputArray cameraMatrix, InputArray distCoeffs, - InputArray R, InputArray newCameraMatrix, - Size size, int m1type, OutputArray map1, OutputArray map2 ); - -//! initializes maps for cv::remap() for wide-angle -CV_EXPORTS_W float initWideAngleProjMap( InputArray cameraMatrix, InputArray distCoeffs, - Size imageSize, int destImageWidth, - int m1type, OutputArray map1, OutputArray map2, - int projType = PROJ_SPHERICAL_EQRECT, double alpha = 0); - -/** @brief Returns the default new camera matrix. - -The function returns the camera matrix that is either an exact copy of the input cameraMatrix (when -centerPrinicipalPoint=false ), or the modified one (when centerPrincipalPoint=true). - -In the latter case, the new camera matrix will be: - -\f[\begin{bmatrix} f_x && 0 && ( \texttt{imgSize.width} -1)*0.5 \\ 0 && f_y && ( \texttt{imgSize.height} -1)*0.5 \\ 0 && 0 && 1 \end{bmatrix} ,\f] - -where \f$f_x\f$ and \f$f_y\f$ are \f$(0,0)\f$ and \f$(1,1)\f$ elements of cameraMatrix, respectively. - -By default, the undistortion functions in OpenCV (see initUndistortRectifyMap, undistort) do not -move the principal point. However, when you work with stereo, it is important to move the principal -points in both views to the same y-coordinate (which is required by most of stereo correspondence -algorithms), and may be to the same x-coordinate too. So, you can form the new camera matrix for -each view where the principal points are located at the center. - -@param cameraMatrix Input camera matrix. -@param imgsize Camera view image size in pixels. -@param centerPrincipalPoint Location of the principal point in the new camera matrix. The -parameter indicates whether this location should be at the image center or not. - */ -CV_EXPORTS_W Mat getDefaultNewCameraMatrix( InputArray cameraMatrix, Size imgsize = Size(), - bool centerPrincipalPoint = false ); - -/** @brief Computes the ideal point coordinates from the observed point coordinates. - -The function is similar to cv::undistort and cv::initUndistortRectifyMap but it operates on a -sparse set of points instead of a raster image. Also the function performs a reverse transformation -to projectPoints. In case of a 3D object, it does not reconstruct its 3D coordinates, but for a -planar object, it does, up to a translation vector, if the proper R is specified. -@code - // (u,v) is the input point, (u', v') is the output point - // camera_matrix=[fx 0 cx; 0 fy cy; 0 0 1] - // P=[fx' 0 cx' tx; 0 fy' cy' ty; 0 0 1 tz] - x" = (u - cx)/fx - y" = (v - cy)/fy - (x',y') = undistort(x",y",dist_coeffs) - [X,Y,W]T = R*[x' y' 1]T - x = X/W, y = Y/W - // only performed if P=[fx' 0 cx' [tx]; 0 fy' cy' [ty]; 0 0 1 [tz]] is specified - u' = x*fx' + cx' - v' = y*fy' + cy', -@endcode -where cv::undistort is an approximate iterative algorithm that estimates the normalized original -point coordinates out of the normalized distorted point coordinates ("normalized" means that the -coordinates do not depend on the camera matrix). - -The function can be used for both a stereo camera head or a monocular camera (when R is empty). - -@param src Observed point coordinates, 1xN or Nx1 2-channel (CV_32FC2 or CV_64FC2). -@param dst Output ideal point coordinates after undistortion and reverse perspective -transformation. If matrix P is identity or omitted, dst will contain normalized point coordinates. -@param cameraMatrix Camera matrix \f$\vecthreethree{f_x}{0}{c_x}{0}{f_y}{c_y}{0}{0}{1}\f$ . -@param distCoeffs Input vector of distortion coefficients -\f$(k_1, k_2, p_1, p_2[, k_3[, k_4, k_5, k_6[, s_1, s_2, s_3, s_4[, \tau_x, \tau_y]]]])\f$ -of 4, 5, 8, 12 or 14 elements. If the vector is NULL/empty, the zero distortion coefficients are assumed. -@param R Rectification transformation in the object space (3x3 matrix). R1 or R2 computed by -cv::stereoRectify can be passed here. If the matrix is empty, the identity transformation is used. -@param P New camera matrix (3x3) or new projection matrix (3x4). P1 or P2 computed by -cv::stereoRectify can be passed here. If the matrix is empty, the identity new camera matrix is used. - */ -CV_EXPORTS_W void undistortPoints( InputArray src, OutputArray dst, - InputArray cameraMatrix, InputArray distCoeffs, - InputArray R = noArray(), InputArray P = noArray()); - -//! @} imgproc_transform - //! @addtogroup imgproc_hist //! @{ -/** @example demhist.cpp +/** @example samples/cpp/demhist.cpp An example for creating histograms of an image */ /** @brief Calculates a histogram of a set of arrays. -The functions calcHist calculate the histogram of one or more arrays. The elements of a tuple used +The function cv::calcHist calculates the histogram of one or more arrays. The elements of a tuple used to increment a histogram bin are taken from the corresponding input arrays at the same location. The sample below shows how to compute a 2D Hue-Saturation histogram for a color image. : -@code - #include - #include - - using namespace cv; +@include snippets/imgproc_calcHist.cpp - int main( int argc, char** argv ) - { - Mat src, hsv; - if( argc != 2 || !(src=imread(argv[1], 1)).data ) - return -1; - - cvtColor(src, hsv, COLOR_BGR2HSV); - - // Quantize the hue to 30 levels - // and the saturation to 32 levels - int hbins = 30, sbins = 32; - int histSize[] = {hbins, sbins}; - // hue varies from 0 to 179, see cvtColor - float hranges[] = { 0, 180 }; - // saturation varies from 0 (black-gray-white) to - // 255 (pure spectrum color) - float sranges[] = { 0, 256 }; - const float* ranges[] = { hranges, sranges }; - MatND hist; - // we compute the histogram from the 0-th and 1-st channels - int channels[] = {0, 1}; - - calcHist( &hsv, 1, channels, Mat(), // do not use mask - hist, 2, histSize, ranges, - true, // the histogram is uniform - false ); - double maxVal=0; - minMaxLoc(hist, 0, &maxVal, 0, 0); - - int scale = 10; - Mat histImg = Mat::zeros(sbins*scale, hbins*10, CV_8UC3); - - for( int h = 0; h < hbins; h++ ) - for( int s = 0; s < sbins; s++ ) - { - float binVal = hist.at(h, s); - int intensity = cvRound(binVal*255/maxVal); - rectangle( histImg, Point(h*scale, s*scale), - Point( (h+1)*scale - 1, (s+1)*scale - 1), - Scalar::all(intensity), - CV_FILLED ); - } - - namedWindow( "Source", 1 ); - imshow( "Source", src ); - - namedWindow( "H-S Histogram", 1 ); - imshow( "H-S Histogram", histImg ); - waitKey(); - } -@endcode - -@param images Source arrays. They all should have the same depth, CV_8U or CV_32F , and the same +@param images Source arrays. They all should have the same depth, CV_8U, CV_16U or CV_32F , and the same size. Each of them can have an arbitrary number of channels. @param nimages Number of source images. @param channels List of the dims channels used to compute the histogram. The first array channels @@ -3030,7 +3063,7 @@ CV_EXPORTS void calcHist( const Mat* images, int nimages, /** @overload -this variant uses cv::SparseMat for output +this variant uses %SparseMat for output */ CV_EXPORTS void calcHist( const Mat* images, int nimages, const int* channels, InputArray mask, @@ -3048,8 +3081,8 @@ CV_EXPORTS_W void calcHist( InputArrayOfArrays images, /** @brief Calculates the back projection of a histogram. -The functions calcBackProject calculate the back project of the histogram. That is, similarly to -cv::calcHist , at each location (x, y) the function collects the values from the selected channels +The function cv::calcBackProject calculates the back project of the histogram. That is, similarly to +#calcHist , at each location (x, y) the function collects the values from the selected channels in the input images and finds the corresponding histogram bin. But instead of incrementing it, the function reads the bin value, scales it by scale , and stores in backProject(x,y) . In terms of statistics, the function computes probability of each element value in respect with the empirical @@ -3069,7 +3102,7 @@ component. This is an approximate algorithm of the CamShift color object tracker. -@param images Source arrays. They all should have the same depth, CV_8U or CV_32F , and the same +@param images Source arrays. They all should have the same depth, CV_8U, CV_16U or CV_32F , and the same size. Each of them can have an arbitrary number of channels. @param nimages Number of source images. @param channels The list of channels used to compute the back projection. The number of channels @@ -3079,11 +3112,11 @@ images[0].channels() + images[1].channels()-1, and so on. @param hist Input histogram that can be dense or sparse. @param backProject Destination back projection array that is a single-channel array of the same size and depth as images[0] . -@param ranges Array of arrays of the histogram bin boundaries in each dimension. See calcHist . +@param ranges Array of arrays of the histogram bin boundaries in each dimension. See #calcHist . @param scale Optional scale factor for the output back projection. @param uniform Flag indicating whether the histogram is uniform or not (see above). -@sa cv::calcHist, cv::compareHist +@sa calcHist, compareHist */ CV_EXPORTS void calcBackProject( const Mat* images, int nimages, const int* channels, InputArray hist, @@ -3104,18 +3137,18 @@ CV_EXPORTS_W void calcBackProject( InputArrayOfArrays images, const std::vector< /** @brief Compares two histograms. -The function compare two dense or two sparse histograms using the specified method. +The function cv::compareHist compares two dense or two sparse histograms using the specified method. The function returns \f$d(H_1, H_2)\f$ . While the function works well with 1-, 2-, 3-dimensional dense histograms, it may not be suitable for high-dimensional sparse histograms. In such histograms, because of aliasing and sampling problems, the coordinates of non-zero histogram bins can slightly shift. To compare such histograms -or more general sparse configurations of weighted points, consider using the cv::EMD function. +or more general sparse configurations of weighted points, consider using the #EMD function. @param H1 First compared histogram. @param H2 Second compared histogram of the same size as H1 . -@param method Comparison method, see cv::HistCompMethods +@param method Comparison method, see #HistCompMethods */ CV_EXPORTS_W double compareHist( InputArray H1, InputArray H2, int method ); @@ -3139,6 +3172,14 @@ The algorithm normalizes the brightness and increases the contrast of the image. */ CV_EXPORTS_W void equalizeHist( InputArray src, OutputArray dst ); +/** @brief Creates a smart pointer to a cv::CLAHE class and initializes it. + +@param clipLimit Threshold for contrast limiting. +@param tileGridSize Size of grid for histogram equalization. Input image will be divided into +equally sized rectangular tiles. tileGridSize defines the number of tiles in row and column. + */ +CV_EXPORTS_W Ptr createCLAHE(double clipLimit = 40.0, Size tileGridSize = Size(8, 8)); + /** @brief Computes the "minimal work" distance between two weighted point configurations. The function computes the earth mover distance and/or a lower boundary of the distance between the @@ -3152,11 +3193,13 @@ same object. @param signature1 First signature, a \f$\texttt{size1}\times \texttt{dims}+1\f$ floating-point matrix. Each row stores the point weight followed by the point coordinates. The matrix is allowed to have -a single column (weights only) if the user-defined cost matrix is used. +a single column (weights only) if the user-defined cost matrix is used. The weights must be +non-negative and have at least one non-zero value. @param signature2 Second signature of the same format as signature1 , though the number of rows may be different. The total weights may be different. In this case an extra "dummy" point is added -to either signature1 or signature2 . -@param distType Used metric. See cv::DistanceTypes. +to either signature1 or signature2. The weights must be non-negative and have at least one non-zero +value. +@param distType Used metric. See #DistanceTypes. @param cost User-defined \f$\texttt{size1}\times \texttt{size2}\f$ cost matrix. Also, if a cost matrix is used, lower boundary lowerBound cannot be calculated because it needs a metric function. @param lowerBound Optional input/output parameter: lower boundary of a distance between the two @@ -3175,11 +3218,15 @@ CV_EXPORTS float EMD( InputArray signature1, InputArray signature2, int distType, InputArray cost=noArray(), float* lowerBound = 0, OutputArray flow = noArray() ); +CV_EXPORTS_AS(EMD) float wrapperEMD( InputArray signature1, InputArray signature2, + int distType, InputArray cost=noArray(), + CV_IN_OUT Ptr lowerBound = Ptr(), OutputArray flow = noArray() ); + //! @} imgproc_hist -/** @example watershed.cpp +/** @example samples/cpp/watershed.cpp An example using the watershed algorithm - */ +*/ /** @brief Performs a marker-based image segmentation using the watershed algorithm. @@ -3189,7 +3236,7 @@ algorithm, described in @cite Meyer92 . Before passing the image to the function, you have to roughly outline the desired regions in the image markers with positive (\>0) indices. So, every region is represented as one or more connected components with the pixel values 1, 2, 3, and so on. Such markers can be retrieved from a binary -mask using findContours and drawContours (see the watershed.cpp demo). The markers are "seeds" of +mask using #findContours and #drawContours (see the watershed.cpp demo). The markers are "seeds" of the future image regions. All the other pixels in markers , whose relation to the outlined regions is not known and should be defined by the algorithm, should be set to 0's. In the function output, each pixel in markers is set to a value of the "seed" components or to -1 at boundaries between the @@ -3257,9 +3304,10 @@ CV_EXPORTS_W void pyrMeanShiftFiltering( InputArray src, OutputArray dst, //! @addtogroup imgproc_misc //! @{ -/** @example grabcut.cpp +/** @example samples/cpp/grabcut.cpp An example using the GrabCut algorithm - */ +![Sample Screenshot](grabcut_output1.jpg) +*/ /** @brief Runs the GrabCut algorithm. @@ -3267,33 +3315,32 @@ The function implements the [GrabCut image segmentation algorithm](http://en.wik @param img Input 8-bit 3-channel image. @param mask Input/output 8-bit single-channel mask. The mask is initialized by the function when -mode is set to GC_INIT_WITH_RECT. Its elements may have one of the cv::GrabCutClasses. +mode is set to #GC_INIT_WITH_RECT. Its elements may have one of the #GrabCutClasses. @param rect ROI containing a segmented object. The pixels outside of the ROI are marked as -"obvious background". The parameter is only used when mode==GC_INIT_WITH_RECT . +"obvious background". The parameter is only used when mode==#GC_INIT_WITH_RECT . @param bgdModel Temporary array for the background model. Do not modify it while you are processing the same image. @param fgdModel Temporary arrays for the foreground model. Do not modify it while you are processing the same image. @param iterCount Number of iterations the algorithm should make before returning the result. Note -that the result can be refined with further calls with mode==GC_INIT_WITH_MASK or +that the result can be refined with further calls with mode==#GC_INIT_WITH_MASK or mode==GC_EVAL . -@param mode Operation mode that could be one of the cv::GrabCutModes +@param mode Operation mode that could be one of the #GrabCutModes */ CV_EXPORTS_W void grabCut( InputArray img, InputOutputArray mask, Rect rect, InputOutputArray bgdModel, InputOutputArray fgdModel, int iterCount, int mode = GC_EVAL ); -/** @example distrans.cpp -An example on using the distance transform\ +/** @example samples/cpp/distrans.cpp +An example on using the distance transform */ - /** @brief Calculates the distance to the closest zero pixel for each pixel of the source image. -The functions distanceTransform calculate the approximate or precise distance from every binary +The function cv::distanceTransform calculates the approximate or precise distance from every binary image pixel to the nearest zero pixel. For zero image pixels, the distance will obviously be zero. -When maskSize == DIST_MASK_PRECISE and distanceType == DIST_L2 , the function runs the +When maskSize == #DIST_MASK_PRECISE and distanceType == #DIST_L2 , the function runs the algorithm described in @cite Felzenszwalb04 . This algorithm is parallelized with the TBB library. In other cases, the algorithm @cite Borgefors86 is used. This means that for a pixel the function @@ -3302,8 +3349,8 @@ diagonal, or knight's move (the latest is available for a \f$5\times 5\f$ mask). distance is calculated as a sum of these basic distances. Since the distance function should be symmetric, all of the horizontal and vertical shifts must have the same cost (denoted as a ), all the diagonal shifts must have the same cost (denoted as `b`), and all knight's moves must have the -same cost (denoted as `c`). For the cv::DIST_C and cv::DIST_L1 types, the distance is calculated -precisely, whereas for cv::DIST_L2 (Euclidean distance) the distance can be calculated only with a +same cost (denoted as `c`). For the #DIST_C and #DIST_L1 types, the distance is calculated +precisely, whereas for #DIST_L2 (Euclidean distance) the distance can be calculated only with a relative error (a \f$5\times 5\f$ mask gives more accurate results). For `a`,`b`, and `c`, OpenCV uses the values suggested in the original paper: - DIST_L1: `a = 1, b = 2` @@ -3312,21 +3359,21 @@ uses the values suggested in the original paper: - `5 x 5`: `a=1, b=1.4, c=2.1969` - DIST_C: `a = 1, b = 1` -Typically, for a fast, coarse distance estimation DIST_L2, a \f$3\times 3\f$ mask is used. For a -more accurate distance estimation DIST_L2, a \f$5\times 5\f$ mask or the precise algorithm is used. +Typically, for a fast, coarse distance estimation #DIST_L2, a \f$3\times 3\f$ mask is used. For a +more accurate distance estimation #DIST_L2, a \f$5\times 5\f$ mask or the precise algorithm is used. Note that both the precise and the approximate algorithms are linear on the number of pixels. This variant of the function does not only compute the minimum distance for each pixel \f$(x, y)\f$ but also identifies the nearest connected component consisting of zero pixels -(labelType==DIST_LABEL_CCOMP) or the nearest zero pixel (labelType==DIST_LABEL_PIXEL). Index of the -component/pixel is stored in `labels(x, y)`. When labelType==DIST_LABEL_CCOMP, the function +(labelType==#DIST_LABEL_CCOMP) or the nearest zero pixel (labelType==#DIST_LABEL_PIXEL). Index of the +component/pixel is stored in `labels(x, y)`. When labelType==#DIST_LABEL_CCOMP, the function automatically finds connected components of zero pixels in the input image and marks them with -distinct labels. When labelType==DIST_LABEL_CCOMP, the function scans through the input image and +distinct labels. When labelType==#DIST_LABEL_CCOMP, the function scans through the input image and marks all the zero pixels with distinct labels. In this mode, the complexity is still linear. That is, the function provides a very fast way to compute the Voronoi diagram for a binary image. Currently, the second variant can use only the -approximate distance transform algorithm, i.e. maskSize=DIST_MASK_PRECISE is not supported +approximate distance transform algorithm, i.e. maskSize=#DIST_MASK_PRECISE is not supported yet. @param src 8-bit, single-channel (binary) source image. @@ -3334,12 +3381,12 @@ yet. single-channel image of the same size as src. @param labels Output 2D array of labels (the discrete Voronoi diagram). It has the type CV_32SC1 and the same size as src. -@param distanceType Type of distance, see cv::DistanceTypes -@param maskSize Size of the distance transform mask, see cv::DistanceTransformMasks. -DIST_MASK_PRECISE is not supported by this variant. In case of the DIST_L1 or DIST_C distance type, +@param distanceType Type of distance, see #DistanceTypes +@param maskSize Size of the distance transform mask, see #DistanceTransformMasks. +#DIST_MASK_PRECISE is not supported by this variant. In case of the #DIST_L1 or #DIST_C distance type, the parameter is forced to 3 because a \f$3\times 3\f$ mask gives the same result as \f$5\times 5\f$ or any larger aperture. -@param labelType Type of the label array to build, see cv::DistanceTransformLabelTypes. +@param labelType Type of the label array to build, see #DistanceTransformLabelTypes. */ CV_EXPORTS_AS(distanceTransformWithLabels) void distanceTransform( InputArray src, OutputArray dst, OutputArray labels, int distanceType, int maskSize, @@ -3349,18 +3396,18 @@ CV_EXPORTS_AS(distanceTransformWithLabels) void distanceTransform( InputArray sr @param src 8-bit, single-channel (binary) source image. @param dst Output image with calculated distances. It is a 8-bit or 32-bit floating-point, single-channel image of the same size as src . -@param distanceType Type of distance, see cv::DistanceTypes -@param maskSize Size of the distance transform mask, see cv::DistanceTransformMasks. In case of the -DIST_L1 or DIST_C distance type, the parameter is forced to 3 because a \f$3\times 3\f$ mask gives +@param distanceType Type of distance, see #DistanceTypes +@param maskSize Size of the distance transform mask, see #DistanceTransformMasks. In case of the +#DIST_L1 or #DIST_C distance type, the parameter is forced to 3 because a \f$3\times 3\f$ mask gives the same result as \f$5\times 5\f$ or any larger aperture. @param dstType Type of output image. It can be CV_8U or CV_32F. Type CV_8U can be used only for -the first variant of the function and distanceType == DIST_L1. +the first variant of the function and distanceType == #DIST_L1. */ CV_EXPORTS_W void distanceTransform( InputArray src, OutputArray dst, int distanceType, int maskSize, int dstType=CV_32F); -/** @example ffilldemo.cpp - An example using the FloodFill technique +/** @example samples/cpp/ffilldemo.cpp +An example using the FloodFill technique */ /** @overload @@ -3374,7 +3421,7 @@ CV_EXPORTS int floodFill( InputOutputArray image, /** @brief Fills a connected component with the given color. -The functions floodFill fill a connected component starting from the seed point with the specified +The function cv::floodFill fills a connected component starting from the seed point with the specified color. The connectivity is determined by the color/brightness closeness of the neighbor pixels. The pixel at \f$(x,y)\f$ is considered to belong to the repainted domain if: @@ -3411,14 +3458,15 @@ Use these functions to either mark a connected component with the specified colo a mask and then extract the contour, or copy the region to another image, and so on. @param image Input/output 1- or 3-channel, 8-bit, or floating-point image. It is modified by the -function unless the FLOODFILL_MASK_ONLY flag is set in the second variant of the function. See +function unless the #FLOODFILL_MASK_ONLY flag is set in the second variant of the function. See the details below. @param mask Operation mask that should be a single-channel 8-bit image, 2 pixels wider and 2 pixels taller than image. Since this is both an input and output parameter, you must take responsibility of initializing it. Flood-filling cannot go across non-zero pixels in the input mask. For example, an edge detector output can be used as a mask to stop filling at edges. On output, pixels in the mask corresponding to filled pixels in the image are set to 1 or to the a value specified in flags -as described below. It is therefore possible to use the same mask in multiple calls to the function +as described below. Additionally, the function fills the border of the mask with ones to simplify +internal processing. It is therefore possible to use the same mask in multiple calls to the function to make sure the filled areas do not overlap. @param seedPoint Starting point. @param newVal New value of the repainted domain pixels. @@ -3435,7 +3483,7 @@ will be considered. The next 8 bits (8-16) contain a value between 1 and 255 wit the mask (the default value is 1). For example, 4 | ( 255 \<\< 8 ) will consider 4 nearest neighbours and fill the mask with a value of 255. The following additional options occupy higher bits and therefore may be further combined with the connectivity and mask fill values using -bit-wise or (|), see cv::FloodFillFlags. +bit-wise or (|), see #FloodFillFlags. @note Since the mask is larger than the filled image, a pixel \f$(x, y)\f$ in image corresponds to the pixel \f$(x+1, y+1)\f$ in the mask . @@ -3447,6 +3495,20 @@ CV_EXPORTS_W int floodFill( InputOutputArray image, InputOutputArray mask, Scalar loDiff = Scalar(), Scalar upDiff = Scalar(), int flags = 4 ); +//! Performs linear blending of two images: +//! \f[ \texttt{dst}(i,j) = \texttt{weights1}(i,j)*\texttt{src1}(i,j) + \texttt{weights2}(i,j)*\texttt{src2}(i,j) \f] +//! @param src1 It has a type of CV_8UC(n) or CV_32FC(n), where n is a positive integer. +//! @param src2 It has the same type and size as src1. +//! @param weights1 It has a type of CV_32FC1 and the same size with src1. +//! @param weights2 It has a type of CV_32FC1 and the same size with src1. +//! @param dst It is created if it does not have the same size and type with src1. +CV_EXPORTS void blendLinear(InputArray src1, InputArray src2, InputArray weights1, InputArray weights2, OutputArray dst); + +//! @} imgproc_misc + +//! @addtogroup imgproc_color_conversions +//! @{ + /** @brief Converts an image from one color space to another. The function converts an input image from one color space to another. In case of a transformation @@ -3465,13 +3527,13 @@ In case of linear transformations, the range does not matter. But in case of a n transformation, an input RGB image should be normalized to the proper value range to get the correct results, for example, for RGB \f$\rightarrow\f$ L\*u\*v\* transformation. For example, if you have a 32-bit floating-point image directly converted from an 8-bit image without any scaling, then it will -have the 0..255 value range instead of 0..1 assumed by the function. So, before calling cvtColor , +have the 0..255 value range instead of 0..1 assumed by the function. So, before calling #cvtColor , you need first to scale the image down: @code img *= 1./255; cvtColor(img, img, COLOR_BGR2Luv); @endcode -If you use cvtColor with 8-bit images, the conversion will have some information lost. For many +If you use #cvtColor with 8-bit images, the conversion will have some information lost. For many applications, this will not be noticeable but it is recommended to use 32-bit images in applications that need the full range of colors or that convert an image before an operation and then convert back. @@ -3482,7 +3544,7 @@ range: 255 for CV_8U, 65535 for CV_16U, 1 for CV_32F. @param src input image: 8-bit unsigned, 16-bit unsigned ( CV_16UC... ), or single-precision floating-point. @param dst output image of the same size and depth as src. -@param code color space conversion code (see cv::ColorConversionCodes). +@param code color space conversion code (see #ColorConversionCodes). @param dstCn number of channels in the destination image; if the parameter is 0, the number of the channels is derived automatically from src and code. @@ -3490,10 +3552,59 @@ channels is derived automatically from src and code. */ CV_EXPORTS_W void cvtColor( InputArray src, OutputArray dst, int code, int dstCn = 0 ); -//! @} imgproc_misc +/** @brief Converts an image from one color space to another where the source image is +stored in two planes. + +This function only supports YUV420 to RGB conversion as of now. + +@param src1: 8-bit image (#CV_8U) of the Y plane. +@param src2: image containing interleaved U/V plane. +@param dst: output image. +@param code: Specifies the type of conversion. It can take any of the following values: +- #COLOR_YUV2BGR_NV12 +- #COLOR_YUV2RGB_NV12 +- #COLOR_YUV2BGRA_NV12 +- #COLOR_YUV2RGBA_NV12 +- #COLOR_YUV2BGR_NV21 +- #COLOR_YUV2RGB_NV21 +- #COLOR_YUV2BGRA_NV21 +- #COLOR_YUV2RGBA_NV21 +*/ +CV_EXPORTS_W void cvtColorTwoPlane( InputArray src1, InputArray src2, OutputArray dst, int code ); + +/** @brief main function for all demosaicing processes + +@param src input image: 8-bit unsigned or 16-bit unsigned. +@param dst output image of the same size and depth as src. +@param code Color space conversion code (see the description below). +@param dstCn number of channels in the destination image; if the parameter is 0, the number of the +channels is derived automatically from src and code. + +The function can do the following transformations: + +- Demosaicing using bilinear interpolation + + #COLOR_BayerBG2BGR , #COLOR_BayerGB2BGR , #COLOR_BayerRG2BGR , #COLOR_BayerGR2BGR + + #COLOR_BayerBG2GRAY , #COLOR_BayerGB2GRAY , #COLOR_BayerRG2GRAY , #COLOR_BayerGR2GRAY + +- Demosaicing using Variable Number of Gradients. + + #COLOR_BayerBG2BGR_VNG , #COLOR_BayerGB2BGR_VNG , #COLOR_BayerRG2BGR_VNG , #COLOR_BayerGR2BGR_VNG + +- Edge-Aware Demosaicing. + + #COLOR_BayerBG2BGR_EA , #COLOR_BayerGB2BGR_EA , #COLOR_BayerRG2BGR_EA , #COLOR_BayerGR2BGR_EA + +- Demosaicing with alpha channel + + #COLOR_BayerBG2BGRA , #COLOR_BayerGB2BGRA , #COLOR_BayerRG2BGRA , #COLOR_BayerGR2BGRA + +@sa cvtColor +*/ +CV_EXPORTS_W void demosaicing(InputArray src, OutputArray dst, int code, int dstCn = 0); -// main function for all demosaicing procceses -CV_EXPORTS_W void demosaicing(InputArray _src, OutputArray _dst, int code, int dcn = 0); +//! @} imgproc_color_conversions //! @addtogroup imgproc_shape //! @{ @@ -3509,6 +3620,9 @@ results are returned in the structure cv::Moments. used for images only. @returns moments. +@note Only applicable to contour moments calculations from Python bindings: Note that the numpy +type for the input array should be either np.int32 or np.float32. + @sa contourArea, arcLength */ CV_EXPORTS_W Moments moments( InputArray array, bool binaryImage = false ); @@ -3554,6 +3668,10 @@ enum TemplateMatchModes { TM_CCOEFF_NORMED = 5 //!< \f[R(x,y)= \frac{ \sum_{x',y'} (T'(x',y') \cdot I'(x+x',y+y')) }{ \sqrt{\sum_{x',y'}T'(x',y')^2 \cdot \sum_{x',y'} I'(x+x',y+y')^2} }\f] }; +/** @example samples/cpp/tutorial_code/Histograms_Matching/MatchTemplate_Demo.cpp +An example using Template Matching algorithm +*/ + /** @brief Compares a template against overlapped image regions. The function slides through image , compares the overlapped patches of size \f$w \times h\f$ against @@ -3562,8 +3680,8 @@ for the available comparison methods ( \f$I\f$ denotes image, \f$T\f$ template, is done over template and/or the image patch: \f$x' = 0...w-1, y' = 0...h-1\f$ After the function finishes the comparison, the best matches can be found as global minimums (when -TM_SQDIFF was used) or maximums (when TM_CCORR or TM_CCOEFF was used) using the -minMaxLoc function. In case of a color image, template summation in the numerator and each sum in +#TM_SQDIFF was used) or maximums (when #TM_CCORR or #TM_CCOEFF was used) using the +#minMaxLoc function. In case of a color image, template summation in the numerator and each sum in the denominator is done over all of the channels and separate mean values are used for each channel. That is, the function can take a color template and a color image. The result will still be a single-channel image, which is easier to analyze. @@ -3573,9 +3691,9 @@ single-channel image, which is easier to analyze. data type. @param result Map of comparison results. It must be single-channel 32-bit floating-point. If image is \f$W \times H\f$ and templ is \f$w \times h\f$ , then result is \f$(W-w+1) \times (H-h+1)\f$ . -@param method Parameter specifying the comparison method, see cv::TemplateMatchModes +@param method Parameter specifying the comparison method, see #TemplateMatchModes @param mask Mask of searched template. It must have the same datatype and size with templ. It is -not set by default. +not set by default. Currently, only the #TM_SQDIFF and #TM_CCORR_NORMED methods are supported. */ CV_EXPORTS_W void matchTemplate( InputArray image, InputArray templ, OutputArray result, int method, InputArray mask = noArray() ); @@ -3585,27 +3703,74 @@ CV_EXPORTS_W void matchTemplate( InputArray image, InputArray templ, //! @addtogroup imgproc_shape //! @{ +/** @example samples/cpp/connected_components.cpp +This program demonstrates connected components and use of the trackbar +*/ + /** @brief computes the connected components labeled image of boolean image image with 4 or 8 way connectivity - returns N, the total number of labels [0, N-1] where 0 represents the background label. ltype specifies the output label image type, an important consideration based on the total number of labels or alternatively the total number of pixels in -the source image. +the source image. ccltype specifies the connected components labeling algorithm to use, currently +Grana (BBDT) and Wu's (SAUF) algorithms are supported, see the #ConnectedComponentsAlgorithmsTypes +for details. Note that SAUF algorithm forces a row major ordering of labels while BBDT does not. +This function uses parallel version of both Grana and Wu's algorithms if at least one allowed +parallel framework is enabled and if the rows of the image are at least twice the number returned by #getNumberOfCPUs. @param image the 8-bit single-channel image to be labeled @param labels destination labeled image @param connectivity 8 or 4 for 8-way or 4-way connectivity respectively @param ltype output image label type. Currently CV_32S and CV_16U are supported. - */ +@param ccltype connected components algorithm type (see the #ConnectedComponentsAlgorithmsTypes). +*/ +CV_EXPORTS_AS(connectedComponentsWithAlgorithm) int connectedComponents(InputArray image, OutputArray labels, + int connectivity, int ltype, int ccltype); + + +/** @overload + +@param image the 8-bit single-channel image to be labeled +@param labels destination labeled image +@param connectivity 8 or 4 for 8-way or 4-way connectivity respectively +@param ltype output image label type. Currently CV_32S and CV_16U are supported. +*/ CV_EXPORTS_W int connectedComponents(InputArray image, OutputArray labels, int connectivity = 8, int ltype = CV_32S); + +/** @brief computes the connected components labeled image of boolean image and also produces a statistics output for each label + +image with 4 or 8 way connectivity - returns N, the total number of labels [0, N-1] where 0 +represents the background label. ltype specifies the output label image type, an important +consideration based on the total number of labels or alternatively the total number of pixels in +the source image. ccltype specifies the connected components labeling algorithm to use, currently +Grana's (BBDT) and Wu's (SAUF) algorithms are supported, see the #ConnectedComponentsAlgorithmsTypes +for details. Note that SAUF algorithm forces a row major ordering of labels while BBDT does not. +This function uses parallel version of both Grana and Wu's algorithms (statistics included) if at least one allowed +parallel framework is enabled and if the rows of the image are at least twice the number returned by #getNumberOfCPUs. + +@param image the 8-bit single-channel image to be labeled +@param labels destination labeled image +@param stats statistics output for each label, including the background label, see below for +available statistics. Statistics are accessed via stats(label, COLUMN) where COLUMN is one of +#ConnectedComponentsTypes. The data type is CV_32S. +@param centroids centroid output for each label, including the background label. Centroids are +accessed via centroids(label, 0) for x and centroids(label, 1) for y. The data type CV_64F. +@param connectivity 8 or 4 for 8-way or 4-way connectivity respectively +@param ltype output image label type. Currently CV_32S and CV_16U are supported. +@param ccltype connected components algorithm type (see #ConnectedComponentsAlgorithmsTypes). +*/ +CV_EXPORTS_AS(connectedComponentsWithStatsWithAlgorithm) int connectedComponentsWithStats(InputArray image, OutputArray labels, + OutputArray stats, OutputArray centroids, + int connectivity, int ltype, int ccltype); + /** @overload @param image the 8-bit single-channel image to be labeled @param labels destination labeled image @param stats statistics output for each label, including the background label, see below for available statistics. Statistics are accessed via stats(label, COLUMN) where COLUMN is one of -cv::ConnectedComponentsTypes. The data type is CV_32S. +#ConnectedComponentsTypes. The data type is CV_32S. @param centroids centroid output for each label, including the background label. Centroids are accessed via centroids(label, 0) for x and centroids(label, 1) for y. The data type CV_64F. @param connectivity 8 or 4 for 8-way or 4-way connectivity respectively @@ -3619,42 +3784,49 @@ CV_EXPORTS_W int connectedComponentsWithStats(InputArray image, OutputArray labe /** @brief Finds contours in a binary image. The function retrieves contours from the binary image using the algorithm @cite Suzuki85 . The contours -are a useful tool for shape analysis and object detection and recognition. See squares.c in the +are a useful tool for shape analysis and object detection and recognition. See squares.cpp in the OpenCV sample directory. - -@note Source image is modified by this function. Also, the function does not take into account -1-pixel border of the image (it's filled with 0's and used for neighbor analysis in the algorithm), -therefore the contours touching the image border will be clipped. +@note Since opencv 3.2 source image is not modified by this function. @param image Source, an 8-bit single-channel image. Non-zero pixels are treated as 1's. Zero -pixels remain 0's, so the image is treated as binary . You can use compare , inRange , threshold , -adaptiveThreshold , Canny , and others to create a binary image out of a grayscale or color one. -The function modifies the image while extracting the contours. If mode equals to RETR_CCOMP -or RETR_FLOODFILL, the input can also be a 32-bit integer image of labels (CV_32SC1). -@param contours Detected contours. Each contour is stored as a vector of points. -@param hierarchy Optional output vector, containing information about the image topology. It has -as many elements as the number of contours. For each i-th contour contours[i] , the elements -hierarchy[i][0] , hiearchy[i][1] , hiearchy[i][2] , and hiearchy[i][3] are set to 0-based indices +pixels remain 0's, so the image is treated as binary . You can use #compare, #inRange, #threshold , +#adaptiveThreshold, #Canny, and others to create a binary image out of a grayscale or color one. +If mode equals to #RETR_CCOMP or #RETR_FLOODFILL, the input can also be a 32-bit integer image of labels (CV_32SC1). +@param contours Detected contours. Each contour is stored as a vector of points (e.g. +std::vector >). +@param hierarchy Optional output vector (e.g. std::vector), containing information about the image topology. It has +as many elements as the number of contours. For each i-th contour contours[i], the elements +hierarchy[i][0] , hierarchy[i][1] , hierarchy[i][2] , and hierarchy[i][3] are set to 0-based indices in contours of the next and previous contours at the same hierarchical level, the first child contour and the parent contour, respectively. If for the contour i there are no next, previous, parent, or nested contours, the corresponding elements of hierarchy[i] will be negative. -@param mode Contour retrieval mode, see cv::RetrievalModes -@param method Contour approximation method, see cv::ContourApproximationModes +@param mode Contour retrieval mode, see #RetrievalModes +@param method Contour approximation method, see #ContourApproximationModes @param offset Optional offset by which every contour point is shifted. This is useful if the contours are extracted from the image ROI and then they should be analyzed in the whole image context. */ -CV_EXPORTS_W void findContours( InputOutputArray image, OutputArrayOfArrays contours, +CV_EXPORTS_W void findContours( InputArray image, OutputArrayOfArrays contours, OutputArray hierarchy, int mode, int method, Point offset = Point()); /** @overload */ -CV_EXPORTS void findContours( InputOutputArray image, OutputArrayOfArrays contours, +CV_EXPORTS void findContours( InputArray image, OutputArrayOfArrays contours, int mode, int method, Point offset = Point()); +/** @example samples/cpp/squares.cpp +A program using pyramid scaling, Canny, contours and contour simplification to find +squares in a list of images (pic1-6.png). Returns sequence of squares detected on the image. +*/ + +/** @example samples/tapi/squares.cpp +A program using pyramid scaling, Canny, contours and contour simplification to find +squares in the input image. +*/ + /** @brief Approximates a polygonal curve(s) with the specified precision. -The functions approxPolyDP approximate a curve or a polygon with another curve/polygon with less +The function cv::approxPolyDP approximates a curve or a polygon with another curve/polygon with less vertices so that the distance between them is less or equal to the specified precision. It uses the Douglas-Peucker algorithm @@ -3678,19 +3850,20 @@ The function computes a curve length or a closed contour perimeter. */ CV_EXPORTS_W double arcLength( InputArray curve, bool closed ); -/** @brief Calculates the up-right bounding rectangle of a point set. +/** @brief Calculates the up-right bounding rectangle of a point set or non-zero pixels of gray-scale image. -The function calculates and returns the minimal up-right bounding rectangle for the specified point set. +The function calculates and returns the minimal up-right bounding rectangle for the specified point set or +non-zero pixels of gray-scale image. -@param points Input 2D point set, stored in std::vector or Mat. +@param array Input gray-scale image or 2D point set, stored in std::vector or Mat. */ -CV_EXPORTS_W Rect boundingRect( InputArray points ); +CV_EXPORTS_W Rect boundingRect( InputArray array ); /** @brief Calculates a contour area. The function computes a contour area. Similarly to moments , the area is computed using the Green formula. Thus, the returned area and the number of non-zero pixels, if you draw the contour using -drawContours or fillPoly , can be different. Also, the function will most certainly give a wrong +#drawContours or #fillPoly , can be different. Also, the function will most certainly give a wrong results for contours with self-intersections. Example: @@ -3721,9 +3894,8 @@ CV_EXPORTS_W double contourArea( InputArray contour, bool oriented = false ); /** @brief Finds a rotated rectangle of the minimum area enclosing the input 2D point set. The function calculates and returns the minimum-area bounding rectangle (possibly rotated) for a -specified point set. See the OpenCV sample minarea.cpp . Developer should keep in mind that the -returned rotatedRect can contain negative indices when data is close to the containing Mat element -boundary. +specified point set. Developer should keep in mind that the returned RotatedRect can contain negative +indices when data is close to the containing Mat element boundary. @param points Input vector of 2D points, stored in std::vector\<\> or Mat */ @@ -3732,10 +3904,8 @@ CV_EXPORTS_W RotatedRect minAreaRect( InputArray points ); /** @brief Finds the four vertices of a rotated rect. Useful to draw the rotated rectangle. The function finds the four vertices of a rotated rectangle. This function is useful to draw the -rectangle. In C++, instead of using this function, you can directly use box.points() method. Please -visit the [tutorial on bounding -rectangle](http://docs.opencv.org/doc/tutorials/imgproc/shapedescriptors/bounding_rects_circles/bounding_rects_circles.html#bounding-rects-circles) -for more information. +rectangle. In C++, instead of using this function, you can directly use RotatedRect::points method. Please +visit the @ref tutorial_bounding_rotated_ellipses "tutorial on Creating Bounding rotated boxes and ellipses for contours" for more information. @param box The input rotated rectangle. It may be the output of @param points The output array of four vertices of rectangles. @@ -3744,8 +3914,7 @@ CV_EXPORTS_W void boxPoints(RotatedRect box, OutputArray points); /** @brief Finds a circle of the minimum area enclosing a 2D point set. -The function finds the minimal enclosing circle of a 2D point set using an iterative algorithm. See -the OpenCV sample minarea.cpp . +The function finds the minimal enclosing circle of a 2D point set using an iterative algorithm. @param points Input vector of 2D points, stored in std::vector\<\> or Mat @param center Output center of the circle. @@ -3754,8 +3923,8 @@ the OpenCV sample minarea.cpp . CV_EXPORTS_W void minEnclosingCircle( InputArray points, CV_OUT Point2f& center, CV_OUT float& radius ); -/** @example minarea.cpp - */ +/** @example samples/cpp/minarea.cpp +*/ /** @brief Finds a triangle of minimum area enclosing a 2D point set and returns its area. @@ -3767,9 +3936,9 @@ area. The output for a given 2D point set is shown in the image below. 2D points The implementation of the algorithm is based on O'Rourke's @cite ORourke86 and Klee and Laskowski's @cite KleeLaskowski85 papers. O'Rourke provides a \f$\theta(n)\f$ algorithm for finding the minimal -enclosing triangle of a 2D convex polygon with n vertices. Since the minEnclosingTriangle function +enclosing triangle of a 2D convex polygon with n vertices. Since the #minEnclosingTriangle function takes a 2D point set as input an additional preprocessing step of computing the convex hull of the -2D point set is required. The complexity of the convexHull function is \f$O(n log(n))\f$ which is higher +2D point set is required. The complexity of the #convexHull function is \f$O(n log(n))\f$ which is higher than \f$\theta(n)\f$. Thus the overall complexity of the function is \f$O(n log(n))\f$. @param points Input vector of 2D points with depth CV_32S or CV_32F, stored in std::vector\<\> or Mat @@ -3780,25 +3949,24 @@ CV_EXPORTS_W double minEnclosingTriangle( InputArray points, CV_OUT OutputArray /** @brief Compares two shapes. -The function compares two shapes. All three implemented methods use the Hu invariants (see cv::HuMoments) +The function compares two shapes. All three implemented methods use the Hu invariants (see #HuMoments) @param contour1 First contour or grayscale image. @param contour2 Second contour or grayscale image. -@param method Comparison method, see ::ShapeMatchModes +@param method Comparison method, see #ShapeMatchModes @param parameter Method-specific parameter (not supported now). */ CV_EXPORTS_W double matchShapes( InputArray contour1, InputArray contour2, int method, double parameter ); -/** @example convexhull.cpp +/** @example samples/cpp/convexhull.cpp An example using the convexHull functionality */ /** @brief Finds the convex hull of a point set. -The functions find the convex hull of a 2D point set using the Sklansky's algorithm @cite Sklansky82 -that has *O(N logN)* complexity in the current implementation. See the OpenCV sample convexhull.cpp -that demonstrates the usage of different function variants. +The function cv::convexHull finds the convex hull of a 2D point set using the Sklansky's algorithm @cite Sklansky82 +that has *O(N logN)* complexity in the current implementation. @param points Input 2D point set, stored in std::vector or Mat. @param hull Output convex hull. It is either an integer vector of indices or vector of points. In @@ -3811,8 +3979,16 @@ to the right, and its Y axis pointing upwards. @param returnPoints Operation flag. In case of a matrix, when the flag is true, the function returns convex hull points. Otherwise, it returns indices of the convex hull points. When the output array is std::vector, the flag is ignored, and the output depends on the type of the -vector: std::vector\ implies returnPoints=true, std::vector\ implies -returnPoints=false. +vector: std::vector\ implies returnPoints=false, std::vector\ implies +returnPoints=true. + +@note `points` and `hull` should be different arrays, inplace processing isn't supported. + +Check @ref tutorial_hull "the corresponding tutorial" for more details. + +useful links: + +https://www.learnopencv.com/convex-hull-using-opencv-in-python-and-c/ */ CV_EXPORTS_W void convexHull( InputArray points, OutputArray hull, bool clockwise = false, bool returnPoints = true ); @@ -3827,7 +4003,7 @@ The figure below displays convexity defects of a hand contour: @param convexhull Convex hull obtained using convexHull that should contain indices of the contour points that make the hull. @param convexityDefects The output vector of convexity defects. In C++ and the new Python/Java -interface each convexity defect is represented as 4-element integer vector (a.k.a. cv::Vec4i): +interface each convexity defect is represented as 4-element integer vector (a.k.a. #Vec4i): (start_index, end_index, farthest_pt_index, fixpt_depth), where indices are 0-based indices in the original contour of the convexity defect beginning, end and the farthest point, and fixpt_depth is fixed-point approximation (with 8 fractional bits) of the distance between the @@ -3845,12 +4021,28 @@ without self-intersections. Otherwise, the function output is undefined. */ CV_EXPORTS_W bool isContourConvex( InputArray contour ); -//! finds intersection of two convex polygons +/** @example samples/cpp/intersectExample.cpp +Examples of how intersectConvexConvex works +*/ + +/** @brief Finds intersection of two convex polygons + +@param _p1 First polygon +@param _p2 Second polygon +@param _p12 Output polygon describing the intersecting area +@param handleNested When true, an intersection is found if one of the polygons is fully enclosed in the other. +When false, no intersection is found. If the polygons share a side or the vertex of one polygon lies on an edge +of the other, they are not considered nested and an intersection will be found regardless of the value of handleNested. + +@returns Absolute value of area of intersecting polygon + +@note intersectConvexConvex doesn't confirm that both polygons are convex and will return invalid results if they aren't. + */ CV_EXPORTS_W float intersectConvexConvex( InputArray _p1, InputArray _p2, OutputArray _p12, bool handleNested = true ); -/** @example fitellipse.cpp - An example using the fitEllipse technique +/** @example samples/cpp/fitellipse.cpp +An example using the fitEllipse technique */ /** @brief Fits an ellipse around a set of 2D points. @@ -3865,6 +4057,88 @@ border of the containing Mat element. */ CV_EXPORTS_W RotatedRect fitEllipse( InputArray points ); +/** @brief Fits an ellipse around a set of 2D points. + + The function calculates the ellipse that fits a set of 2D points. + It returns the rotated rectangle in which the ellipse is inscribed. + The Approximate Mean Square (AMS) proposed by @cite Taubin1991 is used. + + For an ellipse, this basis set is \f$ \chi= \left(x^2, x y, y^2, x, y, 1\right) \f$, + which is a set of six free coefficients \f$ A^T=\left\{A_{\text{xx}},A_{\text{xy}},A_{\text{yy}},A_x,A_y,A_0\right\} \f$. + However, to specify an ellipse, all that is needed is five numbers; the major and minor axes lengths \f$ (a,b) \f$, + the position \f$ (x_0,y_0) \f$, and the orientation \f$ \theta \f$. This is because the basis set includes lines, + quadratics, parabolic and hyperbolic functions as well as elliptical functions as possible fits. + If the fit is found to be a parabolic or hyperbolic function then the standard #fitEllipse method is used. + The AMS method restricts the fit to parabolic, hyperbolic and elliptical curves + by imposing the condition that \f$ A^T ( D_x^T D_x + D_y^T D_y) A = 1 \f$ where + the matrices \f$ Dx \f$ and \f$ Dy \f$ are the partial derivatives of the design matrix \f$ D \f$ with + respect to x and y. The matrices are formed row by row applying the following to + each of the points in the set: + \f{align*}{ + D(i,:)&=\left\{x_i^2, x_i y_i, y_i^2, x_i, y_i, 1\right\} & + D_x(i,:)&=\left\{2 x_i,y_i,0,1,0,0\right\} & + D_y(i,:)&=\left\{0,x_i,2 y_i,0,1,0\right\} + \f} + The AMS method minimizes the cost function + \f{equation*}{ + \epsilon ^2=\frac{ A^T D^T D A }{ A^T (D_x^T D_x + D_y^T D_y) A^T } + \f} + + The minimum cost is found by solving the generalized eigenvalue problem. + + \f{equation*}{ + D^T D A = \lambda \left( D_x^T D_x + D_y^T D_y\right) A + \f} + + @param points Input 2D point set, stored in std::vector\<\> or Mat + */ +CV_EXPORTS_W RotatedRect fitEllipseAMS( InputArray points ); + + +/** @brief Fits an ellipse around a set of 2D points. + + The function calculates the ellipse that fits a set of 2D points. + It returns the rotated rectangle in which the ellipse is inscribed. + The Direct least square (Direct) method by @cite Fitzgibbon1999 is used. + + For an ellipse, this basis set is \f$ \chi= \left(x^2, x y, y^2, x, y, 1\right) \f$, + which is a set of six free coefficients \f$ A^T=\left\{A_{\text{xx}},A_{\text{xy}},A_{\text{yy}},A_x,A_y,A_0\right\} \f$. + However, to specify an ellipse, all that is needed is five numbers; the major and minor axes lengths \f$ (a,b) \f$, + the position \f$ (x_0,y_0) \f$, and the orientation \f$ \theta \f$. This is because the basis set includes lines, + quadratics, parabolic and hyperbolic functions as well as elliptical functions as possible fits. + The Direct method confines the fit to ellipses by ensuring that \f$ 4 A_{xx} A_{yy}- A_{xy}^2 > 0 \f$. + The condition imposed is that \f$ 4 A_{xx} A_{yy}- A_{xy}^2=1 \f$ which satisfies the inequality + and as the coefficients can be arbitrarily scaled is not overly restrictive. + + \f{equation*}{ + \epsilon ^2= A^T D^T D A \quad \text{with} \quad A^T C A =1 \quad \text{and} \quad C=\left(\begin{matrix} + 0 & 0 & 2 & 0 & 0 & 0 \\ + 0 & -1 & 0 & 0 & 0 & 0 \\ + 2 & 0 & 0 & 0 & 0 & 0 \\ + 0 & 0 & 0 & 0 & 0 & 0 \\ + 0 & 0 & 0 & 0 & 0 & 0 \\ + 0 & 0 & 0 & 0 & 0 & 0 + \end{matrix} \right) + \f} + + The minimum cost is found by solving the generalized eigenvalue problem. + + \f{equation*}{ + D^T D A = \lambda \left( C\right) A + \f} + + The system produces only one positive eigenvalue \f$ \lambda\f$ which is chosen as the solution + with its eigenvector \f$\mathbf{u}\f$. These are used to find the coefficients + + \f{equation*}{ + A = \sqrt{\frac{1}{\mathbf{u}^T C \mathbf{u}}} \mathbf{u} + \f} + The scaling factor guarantees that \f$A^T C A =1\f$. + + @param points Input 2D point set, stored in std::vector\<\> or Mat + */ +CV_EXPORTS_W RotatedRect fitEllipseDirect( InputArray points ); + /** @brief Fits a line to a 2D or 3D point set. The function fitLine fits a line to a 2D or 3D point set by minimizing \f$\sum_i \rho(r_i)\f$ where @@ -3893,7 +4167,7 @@ weights \f$w_i\f$ are adjusted to be inversely proportional to \f$\rho(r_i)\f$ . (x0, y0) is a point on the line. In case of 3D fitting, it should be a vector of 6 elements (like Vec6f) - (vx, vy, vz, x0, y0, z0), where (vx, vy, vz) is a normalized vector collinear to the line and (x0, y0, z0) is a point on the line. -@param distType Distance used by the M-estimator, see cv::DistanceTypes +@param distType Distance used by the M-estimator, see #DistanceTypes @param param Numerical parameter ( C ) for some types of distances. If it is 0, an optimal value is chosen. @param reps Sufficient accuracy for the radius (distance between the coordinate origin and the line). @@ -3922,7 +4196,7 @@ CV_EXPORTS_W double pointPolygonTest( InputArray contour, Point2f pt, bool measu /** @brief Finds out if there is any intersection between two rotated rectangles. -If there is then the vertices of the interesecting region are returned as well. +If there is then the vertices of the intersecting region are returned as well. Below are some examples of intersection configurations. The hatched pattern indicates the intersecting region and the red vertices are returned by the function. @@ -3931,26 +4205,21 @@ intersecting region and the red vertices are returned by the function. @param rect1 First rectangle @param rect2 Second rectangle -@param intersectingRegion The output array of the verticies of the intersecting region. It returns +@param intersectingRegion The output array of the vertices of the intersecting region. It returns at most 8 vertices. Stored as std::vector\ or cv::Mat as Mx1 of type CV_32FC2. -@returns One of cv::RectanglesIntersectTypes +@returns One of #RectanglesIntersectTypes */ CV_EXPORTS_W int rotatedRectangleIntersection( const RotatedRect& rect1, const RotatedRect& rect2, OutputArray intersectingRegion ); -//! @} imgproc_shape - -CV_EXPORTS_W Ptr createCLAHE(double clipLimit = 40.0, Size tileGridSize = Size(8, 8)); - -//! Ballard, D.H. (1981). Generalizing the Hough transform to detect arbitrary shapes. Pattern Recognition 13 (2): 111-122. -//! Detects position only without traslation and rotation -CV_EXPORTS Ptr createGeneralizedHoughBallard(); +/** @brief Creates a smart pointer to a cv::GeneralizedHoughBallard class and initializes it. +*/ +CV_EXPORTS_W Ptr createGeneralizedHoughBallard(); -//! Guil, N., González-Linares, J.M. and Zapata, E.L. (1999). Bidimensional shape detection using an invariant approach. Pattern Recognition 32 (6): 1025-1038. -//! Detects position, traslation and rotation -CV_EXPORTS Ptr createGeneralizedHoughGuil(); +/** @brief Creates a smart pointer to a cv::GeneralizedHoughGuil class and initializes it. +*/ +CV_EXPORTS_W Ptr createGeneralizedHoughGuil(); -//! Performs linear blending of two images -CV_EXPORTS void blendLinear(InputArray src1, InputArray src2, InputArray weights1, InputArray weights2, OutputArray dst); +//! @} imgproc_shape //! @addtogroup imgproc_colormap //! @{ @@ -3970,22 +4239,46 @@ enum ColormapTypes COLORMAP_HSV = 9, //!< ![HSV](pics/colormaps/colorscale_hsv.jpg) COLORMAP_PINK = 10, //!< ![pink](pics/colormaps/colorscale_pink.jpg) COLORMAP_HOT = 11, //!< ![hot](pics/colormaps/colorscale_hot.jpg) - COLORMAP_PARULA = 12 //!< ![parula](pics/colormaps/colorscale_parula.jpg) + COLORMAP_PARULA = 12, //!< ![parula](pics/colormaps/colorscale_parula.jpg) + COLORMAP_MAGMA = 13, //!< ![magma](pics/colormaps/colorscale_magma.jpg) + COLORMAP_INFERNO = 14, //!< ![inferno](pics/colormaps/colorscale_inferno.jpg) + COLORMAP_PLASMA = 15, //!< ![plasma](pics/colormaps/colorscale_plasma.jpg) + COLORMAP_VIRIDIS = 16, //!< ![viridis](pics/colormaps/colorscale_viridis.jpg) + COLORMAP_CIVIDIS = 17, //!< ![cividis](pics/colormaps/colorscale_cividis.jpg) + COLORMAP_TWILIGHT = 18, //!< ![twilight](pics/colormaps/colorscale_twilight.jpg) + COLORMAP_TWILIGHT_SHIFTED = 19, //!< ![twilight shifted](pics/colormaps/colorscale_twilight_shifted.jpg) + COLORMAP_TURBO = 20 //!< ![turbo](pics/colormaps/colorscale_turbo.jpg) }; +/** @example samples/cpp/falsecolor.cpp +An example using applyColorMap function +*/ + /** @brief Applies a GNU Octave/MATLAB equivalent colormap on a given image. -@param src The source image, grayscale or colored does not matter. +@param src The source image, grayscale or colored of type CV_8UC1 or CV_8UC3. @param dst The result is the colormapped source image. Note: Mat::create is called on dst. -@param colormap The colormap to apply, see cv::ColormapTypes - */ +@param colormap The colormap to apply, see #ColormapTypes +*/ CV_EXPORTS_W void applyColorMap(InputArray src, OutputArray dst, int colormap); +/** @brief Applies a user colormap on a given image. + +@param src The source image, grayscale or colored of type CV_8UC1 or CV_8UC3. +@param dst The result is the colormapped source image. Note: Mat::create is called on dst. +@param userColor The colormap to apply of type CV_8UC1 or CV_8UC3 and size 256 +*/ +CV_EXPORTS_W void applyColorMap(InputArray src, OutputArray dst, InputArray userColor); + //! @} imgproc_colormap //! @addtogroup imgproc_draw //! @{ + +/** OpenCV color channel order is BGR[A] */ +#define CV_RGB(r, g, b) cv::Scalar((b), (g), (r), 0) + /** @brief Draws a line segment connecting two points. The function line draws the line segment between pt1 and pt2 points in the image. The line is @@ -3998,7 +4291,7 @@ lines are drawn using Gaussian filtering. @param pt2 Second point of the line segment. @param color Line color. @param thickness Line thickness. -@param lineType Type of the line, see cv::LineTypes. +@param lineType Type of the line. See #LineTypes. @param shift Number of fractional bits in the point coordinates. */ CV_EXPORTS_W void line(InputOutputArray img, Point pt1, Point pt2, const Scalar& color, @@ -4006,14 +4299,14 @@ CV_EXPORTS_W void line(InputOutputArray img, Point pt1, Point pt2, const Scalar& /** @brief Draws a arrow segment pointing from the first point to the second one. -The function arrowedLine draws an arrow between pt1 and pt2 points in the image. See also cv::line. +The function cv::arrowedLine draws an arrow between pt1 and pt2 points in the image. See also #line. @param img Image. @param pt1 The point the arrow starts from. @param pt2 The point the arrow points to. @param color Line color. @param thickness Line thickness. -@param line_type Type of the line, see cv::LineTypes +@param line_type Type of the line. See #LineTypes @param shift Number of fractional bits in the point coordinates. @param tipLength The length of the arrow tip in relation to the arrow length */ @@ -4022,16 +4315,16 @@ CV_EXPORTS_W void arrowedLine(InputOutputArray img, Point pt1, Point pt2, const /** @brief Draws a simple, thick, or filled up-right rectangle. -The function rectangle draws a rectangle outline or a filled rectangle whose two opposite corners +The function cv::rectangle draws a rectangle outline or a filled rectangle whose two opposite corners are pt1 and pt2. @param img Image. @param pt1 Vertex of the rectangle. @param pt2 Vertex of the rectangle opposite to pt1 . @param color Rectangle color or brightness (grayscale image). -@param thickness Thickness of lines that make up the rectangle. Negative values, like CV_FILLED , +@param thickness Thickness of lines that make up the rectangle. Negative values, like #FILLED, mean that the function has to draw a filled rectangle. -@param lineType Type of the line. See the line description. +@param lineType Type of the line. See #LineTypes @param shift Number of fractional bits in the point coordinates. */ CV_EXPORTS_W void rectangle(InputOutputArray img, Point pt1, Point pt2, @@ -4043,20 +4336,24 @@ CV_EXPORTS_W void rectangle(InputOutputArray img, Point pt1, Point pt2, use `rec` parameter as alternative specification of the drawn rectangle: `r.tl() and r.br()-Point(1,1)` are opposite corners */ -CV_EXPORTS void rectangle(CV_IN_OUT Mat& img, Rect rec, +CV_EXPORTS_W void rectangle(InputOutputArray img, Rect rec, const Scalar& color, int thickness = 1, int lineType = LINE_8, int shift = 0); +/** @example samples/cpp/tutorial_code/ImgProc/basic_drawing/Drawing_2.cpp +An example using drawing functions +*/ + /** @brief Draws a circle. -The function circle draws a simple or filled circle with a given center and radius. +The function cv::circle draws a simple or filled circle with a given center and radius. @param img Image where the circle is drawn. @param center Center of the circle. @param radius Radius of the circle. @param color Circle color. -@param thickness Thickness of the circle outline, if positive. Negative thickness means that a -filled circle is to be drawn. -@param lineType Type of the circle boundary. See the line description. +@param thickness Thickness of the circle outline, if positive. Negative values, like #FILLED, +mean that a filled circle is to be drawn. +@param lineType Type of the circle boundary. See #LineTypes @param shift Number of fractional bits in the coordinates of the center and in the radius value. */ CV_EXPORTS_W void circle(InputOutputArray img, Point center, int radius, @@ -4065,14 +4362,16 @@ CV_EXPORTS_W void circle(InputOutputArray img, Point center, int radius, /** @brief Draws a simple or thick elliptic arc or fills an ellipse sector. -The functions ellipse with less parameters draw an ellipse outline, a filled ellipse, an elliptic -arc, or a filled ellipse sector. A piecewise-linear curve is used to approximate the elliptic arc +The function cv::ellipse with more parameters draws an ellipse outline, a filled ellipse, an elliptic +arc, or a filled ellipse sector. The drawing code uses general parametric form. +A piecewise-linear curve is used to approximate the elliptic arc boundary. If you need more control of the ellipse rendering, you can retrieve the curve using -ellipse2Poly and then render it with polylines or fill it with fillPoly . If you use the first -variant of the function and want to draw the whole ellipse, not an arc, pass startAngle=0 and -endAngle=360 . The figure below explains the meaning of the parameters. +#ellipse2Poly and then render it with #polylines or fill it with #fillPoly. If you use the first +variant of the function and want to draw the whole ellipse, not an arc, pass `startAngle=0` and +`endAngle=360`. If `startAngle` is greater than `endAngle`, they are swapped. The figure below explains +the meaning of the parameters to draw the blue arc. -![Parameters of Elliptic Arc](pics/ellipse.png) +![Parameters of Elliptic Arc](pics/ellipse.svg) @param img Image. @param center Center of the ellipse. @@ -4083,7 +4382,7 @@ endAngle=360 . The figure below explains the meaning of the parameters. @param color Ellipse color. @param thickness Thickness of the ellipse arc outline, if positive. Otherwise, this indicates that a filled ellipse sector is to be drawn. -@param lineType Type of the ellipse boundary. See the line description. +@param lineType Type of the ellipse boundary. See #LineTypes @param shift Number of fractional bits in the coordinates of the center and values of axes. */ CV_EXPORTS_W void ellipse(InputOutputArray img, Point center, Size axes, @@ -4098,7 +4397,7 @@ an ellipse inscribed in the rotated rectangle. @param color Ellipse color. @param thickness Thickness of the ellipse arc outline, if positive. Otherwise, this indicates that a filled ellipse sector is to be drawn. -@param lineType Type of the ellipse boundary. See the line description. +@param lineType Type of the ellipse boundary. See #LineTypes */ CV_EXPORTS_W void ellipse(InputOutputArray img, const RotatedRect& box, const Scalar& color, int thickness = 1, int lineType = LINE_8); @@ -4107,32 +4406,20 @@ CV_EXPORTS_W void ellipse(InputOutputArray img, const RotatedRect& box, const Sc /* ADDING A SET OF PREDEFINED MARKERS WHICH COULD BE USED TO HIGHLIGHT POSITIONS IN AN IMAGE */ /* ----------------------------------------------------------------------------------------- */ -//! Possible set of marker types used for the cv::drawMarker function -enum MarkerTypes -{ - MARKER_CROSS = 0, //!< A crosshair marker shape - MARKER_TILTED_CROSS = 1, //!< A 45 degree tilted crosshair marker shape - MARKER_STAR = 2, //!< A star marker shape, combination of cross and tilted cross - MARKER_DIAMOND = 3, //!< A diamond marker shape - MARKER_SQUARE = 4, //!< A square marker shape - MARKER_TRIANGLE_UP = 5, //!< An upwards pointing triangle marker shape - MARKER_TRIANGLE_DOWN = 6 //!< A downwards pointing triangle marker shape -}; - /** @brief Draws a marker on a predefined position in an image. -The function drawMarker draws a marker on a given position in the image. For the moment several -marker types are supported, see cv::MarkerTypes for more information. +The function cv::drawMarker draws a marker on a given position in the image. For the moment several +marker types are supported, see #MarkerTypes for more information. @param img Image. @param position The point where the crosshair is positioned. @param color Line color. -@param markerType The specific type of marker you want to use, see cv::MarkerTypes +@param markerType The specific type of marker you want to use, see #MarkerTypes @param thickness Line thickness. -@param line_type Type of the line, see cv::LineTypes +@param line_type Type of the line, See #LineTypes @param markerSize The length of the marker axis [default = 20 pixels] */ -CV_EXPORTS_W void drawMarker(CV_IN_OUT Mat& img, Point position, const Scalar& color, +CV_EXPORTS_W void drawMarker(InputOutputArray img, Point position, const Scalar& color, int markerType = MARKER_CROSS, int markerSize=20, int thickness=1, int line_type=8); @@ -4141,21 +4428,21 @@ CV_EXPORTS_W void drawMarker(CV_IN_OUT Mat& img, Point position, const Scalar& c /* ----------------------------------------------------------------------------------------- */ /** @overload */ -CV_EXPORTS void fillConvexPoly(Mat& img, const Point* pts, int npts, +CV_EXPORTS void fillConvexPoly(InputOutputArray img, const Point* pts, int npts, const Scalar& color, int lineType = LINE_8, int shift = 0); /** @brief Fills a convex polygon. -The function fillConvexPoly draws a filled convex polygon. This function is much faster than the -function cv::fillPoly . It can fill not only convex polygons but any monotonic polygon without +The function cv::fillConvexPoly draws a filled convex polygon. This function is much faster than the +function #fillPoly . It can fill not only convex polygons but any monotonic polygon without self-intersections, that is, a polygon whose contour intersects every horizontal line (scan line) twice at the most (though, its top-most and/or the bottom edge could be horizontal). @param img Image. @param points Polygon vertices. @param color Polygon color. -@param lineType Type of the polygon boundaries. See the line description. +@param lineType Type of the polygon boundaries. See #LineTypes @param shift Number of fractional bits in the vertex coordinates. */ CV_EXPORTS_W void fillConvexPoly(InputOutputArray img, InputArray points, @@ -4163,21 +4450,26 @@ CV_EXPORTS_W void fillConvexPoly(InputOutputArray img, InputArray points, int shift = 0); /** @overload */ -CV_EXPORTS void fillPoly(Mat& img, const Point** pts, +CV_EXPORTS void fillPoly(InputOutputArray img, const Point** pts, const int* npts, int ncontours, const Scalar& color, int lineType = LINE_8, int shift = 0, Point offset = Point() ); +/** @example samples/cpp/tutorial_code/ImgProc/basic_drawing/Drawing_1.cpp +An example using drawing functions +Check @ref tutorial_random_generator_and_text "the corresponding tutorial" for more details +*/ + /** @brief Fills the area bounded by one or more polygons. -The function fillPoly fills an area bounded by several polygonal contours. The function can fill +The function cv::fillPoly fills an area bounded by several polygonal contours. The function can fill complex areas, for example, areas with holes, contours with self-intersections (some of their parts), and so forth. @param img Image. @param pts Array of polygons where each polygon is represented as an array of points. @param color Polygon color. -@param lineType Type of the polygon boundaries. See the line description. +@param lineType Type of the polygon boundaries. See #LineTypes @param shift Number of fractional bits in the vertex coordinates. @param offset Optional offset of all points of the contours. */ @@ -4186,7 +4478,7 @@ CV_EXPORTS_W void fillPoly(InputOutputArray img, InputArrayOfArrays pts, Point offset = Point() ); /** @overload */ -CV_EXPORTS void polylines(Mat& img, const Point* const* pts, const int* npts, +CV_EXPORTS void polylines(InputOutputArray img, const Point* const* pts, const int* npts, int ncontours, bool isClosed, const Scalar& color, int thickness = 1, int lineType = LINE_8, int shift = 0 ); @@ -4198,77 +4490,38 @@ CV_EXPORTS void polylines(Mat& img, const Point* const* pts, const int* npts, the function draws a line from the last vertex of each curve to its first vertex. @param color Polyline color. @param thickness Thickness of the polyline edges. -@param lineType Type of the line segments. See the line description. +@param lineType Type of the line segments. See #LineTypes @param shift Number of fractional bits in the vertex coordinates. -The function polylines draws one or more polygonal curves. +The function cv::polylines draws one or more polygonal curves. */ CV_EXPORTS_W void polylines(InputOutputArray img, InputArrayOfArrays pts, bool isClosed, const Scalar& color, int thickness = 1, int lineType = LINE_8, int shift = 0 ); -/** @example contours2.cpp - An example using the drawContour functionality +/** @example samples/cpp/contours2.cpp +An example program illustrates the use of cv::findContours and cv::drawContours +\image html WindowsQtContoursOutput.png "Screenshot of the program" */ -/** @example segment_objects.cpp +/** @example samples/cpp/segment_objects.cpp An example using drawContours to clean up a background segmentation result - */ +*/ /** @brief Draws contours outlines or filled contours. The function draws contour outlines in the image if \f$\texttt{thickness} \ge 0\f$ or fills the area bounded by the contours if \f$\texttt{thickness}<0\f$ . The example below shows how to retrieve connected components from the binary image and label them: : -@code - #include "opencv2/imgproc.hpp" - #include "opencv2/highgui.hpp" - - using namespace cv; - using namespace std; - - int main( int argc, char** argv ) - { - Mat src; - // the first command-line parameter must be a filename of the binary - // (black-n-white) image - if( argc != 2 || !(src=imread(argv[1], 0)).data) - return -1; - - Mat dst = Mat::zeros(src.rows, src.cols, CV_8UC3); - - src = src > 1; - namedWindow( "Source", 1 ); - imshow( "Source", src ); - - vector > contours; - vector hierarchy; - - findContours( src, contours, hierarchy, - RETR_CCOMP, CHAIN_APPROX_SIMPLE ); - - // iterate through all the top-level contours, - // draw each connected component with its own random color - int idx = 0; - for( ; idx >= 0; idx = hierarchy[idx][0] ) - { - Scalar color( rand()&255, rand()&255, rand()&255 ); - drawContours( dst, contours, idx, color, FILLED, 8, hierarchy ); - } - - namedWindow( "Components", 1 ); - imshow( "Components", dst ); - waitKey(0); - } -@endcode +@include snippets/imgproc_drawContours.cpp @param image Destination image. @param contours All the input contours. Each contour is stored as a point vector. @param contourIdx Parameter indicating a contour to draw. If it is negative, all the contours are drawn. @param color Color of the contours. @param thickness Thickness of lines the contours are drawn with. If it is negative (for example, -thickness=CV_FILLED ), the contour interiors are drawn. -@param lineType Line connectivity. See cv::LineTypes. +thickness=#FILLED ), the contour interiors are drawn. +@param lineType Line connectivity. See #LineTypes @param hierarchy Optional information about hierarchy. It is only needed if you want to draw only some of the contours (see maxLevel ). @param maxLevel Maximal level for drawn contours. If it is 0, only the specified contour is drawn. @@ -4277,6 +4530,11 @@ draws the contours, all the nested contours, all the nested-to-nested contours, parameter is only taken into account when there is hierarchy available. @param offset Optional contour shift parameter. Shift all the drawn contours by the specified \f$\texttt{offset}=(dx,dy)\f$ . +@note When thickness=#FILLED, the function is designed to handle connected components with holes correctly +even when no hierarchy date is provided. This is done by analyzing all the outlines together +using even-odd rule. This may give incorrect results if you have a joint collection of separately retrieved +contours. In order to solve this problem, you need to call #drawContours separately for each sub-group +of contours, or iterate over the collection using contourIdx parameter. */ CV_EXPORTS_W void drawContours( InputOutputArray image, InputArrayOfArrays contours, int contourIdx, const Scalar& color, @@ -4286,15 +4544,22 @@ CV_EXPORTS_W void drawContours( InputOutputArray image, InputArrayOfArrays conto /** @brief Clips the line against the image rectangle. -The functions clipLine calculate a part of the line segment that is entirely within the specified -rectangle. They return false if the line segment is completely outside the rectangle. Otherwise, -they return true . +The function cv::clipLine calculates a part of the line segment that is entirely within the specified +rectangle. it returns false if the line segment is completely outside the rectangle. Otherwise, +it returns true . @param imgSize Image size. The image rectangle is Rect(0, 0, imgSize.width, imgSize.height) . @param pt1 First line point. @param pt2 Second line point. */ CV_EXPORTS bool clipLine(Size imgSize, CV_IN_OUT Point& pt1, CV_IN_OUT Point& pt2); +/** @overload +@param imgSize Image size. The image rectangle is Rect(0, 0, imgSize.width, imgSize.height) . +@param pt1 First line point. +@param pt2 Second line point. +*/ +CV_EXPORTS bool clipLine(Size2l imgSize, CV_IN_OUT Point2l& pt1, CV_IN_OUT Point2l& pt2); + /** @overload @param imgRect Image rectangle. @param pt1 First line point. @@ -4305,11 +4570,11 @@ CV_EXPORTS_W bool clipLine(Rect imgRect, CV_OUT CV_IN_OUT Point& pt1, CV_OUT CV_ /** @brief Approximates an elliptic arc with a polyline. The function ellipse2Poly computes the vertices of a polyline that approximates the specified -elliptic arc. It is used by cv::ellipse. +elliptic arc. It is used by #ellipse. If `arcStart` is greater than `arcEnd`, they are swapped. @param center Center of the arc. -@param axes Half of the size of the ellipse main axes. See the ellipse for details. -@param angle Rotation angle of the ellipse in degrees. See the ellipse for details. +@param axes Half of the size of the ellipse main axes. See #ellipse for details. +@param angle Rotation angle of the ellipse in degrees. See #ellipse for details. @param arcStart Starting angle of the elliptic arc in degrees. @param arcEnd Ending angle of the elliptic arc in degrees. @param delta Angle between the subsequent polyline vertices. It defines the approximation @@ -4320,20 +4585,33 @@ CV_EXPORTS_W void ellipse2Poly( Point center, Size axes, int angle, int arcStart, int arcEnd, int delta, CV_OUT std::vector& pts ); +/** @overload +@param center Center of the arc. +@param axes Half of the size of the ellipse main axes. See #ellipse for details. +@param angle Rotation angle of the ellipse in degrees. See #ellipse for details. +@param arcStart Starting angle of the elliptic arc in degrees. +@param arcEnd Ending angle of the elliptic arc in degrees. +@param delta Angle between the subsequent polyline vertices. It defines the approximation accuracy. +@param pts Output vector of polyline vertices. +*/ +CV_EXPORTS void ellipse2Poly(Point2d center, Size2d axes, int angle, + int arcStart, int arcEnd, int delta, + CV_OUT std::vector& pts); + /** @brief Draws a text string. -The function putText renders the specified text string in the image. Symbols that cannot be rendered -using the specified font are replaced by question marks. See getTextSize for a text rendering code +The function cv::putText renders the specified text string in the image. Symbols that cannot be rendered +using the specified font are replaced by question marks. See #getTextSize for a text rendering code example. @param img Image. @param text Text string to be drawn. @param org Bottom-left corner of the text string in the image. -@param fontFace Font type, see cv::HersheyFonts. +@param fontFace Font type, see #HersheyFonts. @param fontScale Font scale factor that is multiplied by the font-specific base size. @param color Text color. @param thickness Thickness of the lines used to draw a text. -@param lineType Line type. See the line for details. +@param lineType Line type. See #LineTypes @param bottomLeftOrigin When true, the image data origin is at the bottom-left corner. Otherwise, it is at the top-left corner. */ @@ -4344,7 +4622,7 @@ CV_EXPORTS_W void putText( InputOutputArray img, const String& text, Point org, /** @brief Calculates the width and height of a text string. -The function getTextSize calculates and returns the size of a box that contains the specified text. +The function cv::getTextSize calculates and returns the size of a box that contains the specified text. That is, the following code renders some text, the tight box surrounding it, and the baseline: : @code String text = "Funny text inside the box"; @@ -4378,19 +4656,33 @@ That is, the following code renders some text, the tight box surrounding it, and @endcode @param text Input text string. -@param fontFace Font to use, see cv::HersheyFonts. +@param fontFace Font to use, see #HersheyFonts. @param fontScale Font scale factor that is multiplied by the font-specific base size. -@param thickness Thickness of lines used to render the text. See putText for details. +@param thickness Thickness of lines used to render the text. See #putText for details. @param[out] baseLine y-coordinate of the baseline relative to the bottom-most text point. @return The size of a box that contains the specified text. -@see cv::putText +@see putText */ CV_EXPORTS_W Size getTextSize(const String& text, int fontFace, double fontScale, int thickness, CV_OUT int* baseLine); + +/** @brief Calculates the font-specific size to use to achieve a given height in pixels. + +@param fontFace Font to use, see cv::HersheyFonts. +@param pixelHeight Pixel height to compute the fontScale for +@param thickness Thickness of lines used to render the text.See putText for details. +@return The fontSize to use for cv::putText + +@see cv::putText +*/ +CV_EXPORTS_W double getFontScaleFromHeight(const int fontFace, + const int pixelHeight, + const int thickness = 1); + /** @brief Line iterator The class is used to iterate over all the pixels on the raster line @@ -4413,7 +4705,7 @@ LineIterator it2 = it; vector buf(it.count); for(int i = 0; i < it.count; i++, ++it) - buf[i] = *(const Vec3b)*it; + buf[i] = *(const Vec3b*)*it; // alternative way of iterating through the line for(int i = 0; i < it2.count; i++, ++it2) @@ -4426,17 +4718,46 @@ for(int i = 0; i < it2.count; i++, ++it2) class CV_EXPORTS LineIterator { public: - /** @brief intializes the iterator + /** @brief initializes the iterator creates iterators for the line connecting pt1 and pt2 the line will be clipped on the image boundaries the line is 8-connected or 4-connected If leftToRight=true, then the iteration is always done from the left-most point to the right most, - not to depend on the ordering of pt1 and pt2 parameters + not to depend on the ordering of pt1 and pt2 parameters; */ LineIterator( const Mat& img, Point pt1, Point pt2, - int connectivity = 8, bool leftToRight = false ); + int connectivity = 8, bool leftToRight = false ) + { + init(&img, Rect(0, 0, img.cols, img.rows), pt1, pt2, connectivity, leftToRight); + ptmode = false; + } + LineIterator( Point pt1, Point pt2, + int connectivity = 8, bool leftToRight = false ) + { + init(0, Rect(std::min(pt1.x, pt2.x), + std::min(pt1.y, pt2.y), + std::max(pt1.x, pt2.x) - std::min(pt1.x, pt2.x) + 1, + std::max(pt1.y, pt2.y) - std::min(pt1.y, pt2.y) + 1), + pt1, pt2, connectivity, leftToRight); + ptmode = true; + } + LineIterator( Size boundingAreaSize, Point pt1, Point pt2, + int connectivity = 8, bool leftToRight = false ) + { + init(0, Rect(0, 0, boundingAreaSize.width, boundingAreaSize.height), + pt1, pt2, connectivity, leftToRight); + ptmode = true; + } + LineIterator( Rect boundingAreaRect, Point pt1, Point pt2, + int connectivity = 8, bool leftToRight = false ) + { + init(0, boundingAreaRect, pt1, pt2, connectivity, leftToRight); + ptmode = true; + } + void init(const Mat* img, Rect boundingAreaRect, Point pt1, Point pt2, int connectivity, bool leftToRight); + /** @brief returns pointer to the current pixel */ uchar* operator *(); @@ -4456,6 +4777,9 @@ class CV_EXPORTS LineIterator int err, count; int minusDelta, plusDelta; int minusStep, plusStep; + int minusShift, plusShift; + Point p; + bool ptmode; }; //! @cond IGNORED @@ -4465,7 +4789,7 @@ class CV_EXPORTS LineIterator inline uchar* LineIterator::operator *() { - return ptr; + return ptmode ? 0 : ptr; } inline @@ -4473,7 +4797,15 @@ LineIterator& LineIterator::operator ++() { int mask = err < 0 ? -1 : 0; err += minusDelta + (plusDelta & mask); - ptr += minusStep + (plusStep & mask); + if(!ptmode) + { + ptr += minusStep + (plusStep & mask); + } + else + { + p.x += minusShift + (plusShift & mask); + p.y += minusStep + (plusStep & mask); + } return *this; } @@ -4488,9 +4820,13 @@ LineIterator LineIterator::operator ++(int) inline Point LineIterator::pos() const { - Point p; - p.y = (int)((ptr - ptr0)/step); - p.x = (int)(((ptr - ptr0) - p.y*step)/elemSize); + if(!ptmode) + { + size_t offset = (size_t)(ptr - ptr0); + int y = (int)(offset/step); + int x = (int)((offset - (size_t)y*step)/elemSize); + return Point(x, y); + } return p; } @@ -4502,8 +4838,4 @@ Point LineIterator::pos() const } // cv -#ifndef DISABLE_OPENCV_24_COMPATIBILITY -#include "opencv2/imgproc/imgproc_c.h" -#endif - #endif diff --git a/IPL/include/opencv/opencv2/imgproc/detail/distortion_model.hpp b/IPL/include/opencv/opencv2/imgproc/detail/distortion_model.hpp deleted file mode 100644 index ca29304..0000000 --- a/IPL/include/opencv/opencv2/imgproc/detail/distortion_model.hpp +++ /dev/null @@ -1,123 +0,0 @@ -/*M/////////////////////////////////////////////////////////////////////////////////////// -// -// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. -// -// By downloading, copying, installing or using the software you agree to this license. -// If you do not agree to this license, do not download, install, -// copy or use the software. -// -// -// License Agreement -// For Open Source Computer Vision Library -// -// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. -// Copyright (C) 2009, Willow Garage Inc., all rights reserved. -// Third party copyrights are property of their respective owners. -// -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: -// -// * Redistribution's of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// * Redistribution's in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// * The name of the copyright holders may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// This software is provided by the copyright holders and contributors "as is" and -// any express or implied warranties, including, but not limited to, the implied -// warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, -// indirect, incidental, special, exemplary, or consequential damages -// (including, but not limited to, procurement of substitute goods or services; -// loss of use, data, or profits; or business interruption) however caused -// and on any theory of liability, whether in contract, strict liability, -// or tort (including negligence or otherwise) arising in any way out of -// the use of this software, even if advised of the possibility of such damage. -// -//M*/ - -#ifndef __OPENCV_IMGPROC_DETAIL_DISTORTION_MODEL_HPP__ -#define __OPENCV_IMGPROC_DETAIL_DISTORTION_MODEL_HPP__ - -//! @cond IGNORED - -namespace cv { namespace detail { -/** -Computes the matrix for the projection onto a tilted image sensor -\param tauX angular parameter rotation around x-axis -\param tauY angular parameter rotation around y-axis -\param matTilt if not NULL returns the matrix -\f[ -\vecthreethree{R_{33}(\tau_x, \tau_y)}{0}{-R_{13}((\tau_x, \tau_y)} -{0}{R_{33}(\tau_x, \tau_y)}{-R_{23}(\tau_x, \tau_y)} -{0}{0}{1} R(\tau_x, \tau_y) -\f] -where -\f[ -R(\tau_x, \tau_y) = -\vecthreethree{\cos(\tau_y)}{0}{-\sin(\tau_y)}{0}{1}{0}{\sin(\tau_y)}{0}{\cos(\tau_y)} -\vecthreethree{1}{0}{0}{0}{\cos(\tau_x)}{\sin(\tau_x)}{0}{-\sin(\tau_x)}{\cos(\tau_x)} = -\vecthreethree{\cos(\tau_y)}{\sin(\tau_y)\sin(\tau_x)}{-\sin(\tau_y)\cos(\tau_x)} -{0}{\cos(\tau_x)}{\sin(\tau_x)} -{\sin(\tau_y)}{-\cos(\tau_y)\sin(\tau_x)}{\cos(\tau_y)\cos(\tau_x)}. -\f] -\param dMatTiltdTauX if not NULL it returns the derivative of matTilt with -respect to \f$\tau_x\f$. -\param dMatTiltdTauY if not NULL it returns the derivative of matTilt with -respect to \f$\tau_y\f$. -\param invMatTilt if not NULL it returns the inverse of matTilt -**/ -template -void computeTiltProjectionMatrix(FLOAT tauX, - FLOAT tauY, - Matx* matTilt = 0, - Matx* dMatTiltdTauX = 0, - Matx* dMatTiltdTauY = 0, - Matx* invMatTilt = 0) -{ - FLOAT cTauX = cos(tauX); - FLOAT sTauX = sin(tauX); - FLOAT cTauY = cos(tauY); - FLOAT sTauY = sin(tauY); - Matx matRotX = Matx(1,0,0,0,cTauX,sTauX,0,-sTauX,cTauX); - Matx matRotY = Matx(cTauY,0,-sTauY,0,1,0,sTauY,0,cTauY); - Matx matRotXY = matRotY * matRotX; - Matx matProjZ = Matx(matRotXY(2,2),0,-matRotXY(0,2),0,matRotXY(2,2),-matRotXY(1,2),0,0,1); - if (matTilt) - { - // Matrix for trapezoidal distortion of tilted image sensor - *matTilt = matProjZ * matRotXY; - } - if (dMatTiltdTauX) - { - // Derivative with respect to tauX - Matx dMatRotXYdTauX = matRotY * Matx(0,0,0,0,-sTauX,cTauX,0,-cTauX,-sTauX); - Matx dMatProjZdTauX = Matx(dMatRotXYdTauX(2,2),0,-dMatRotXYdTauX(0,2), - 0,dMatRotXYdTauX(2,2),-dMatRotXYdTauX(1,2),0,0,0); - *dMatTiltdTauX = (matProjZ * dMatRotXYdTauX) + (dMatProjZdTauX * matRotXY); - } - if (dMatTiltdTauY) - { - // Derivative with respect to tauY - Matx dMatRotXYdTauY = Matx(-sTauY,0,-cTauY,0,0,0,cTauY,0,-sTauY) * matRotX; - Matx dMatProjZdTauY = Matx(dMatRotXYdTauY(2,2),0,-dMatRotXYdTauY(0,2), - 0,dMatRotXYdTauY(2,2),-dMatRotXYdTauY(1,2),0,0,0); - *dMatTiltdTauY = (matProjZ * dMatRotXYdTauY) + (dMatProjZdTauY * matRotXY); - } - if (invMatTilt) - { - FLOAT inv = 1./matRotXY(2,2); - Matx invMatProjZ = Matx(inv,0,inv*matRotXY(0,2),0,inv,inv*matRotXY(1,2),0,0,1); - *invMatTilt = matRotXY.t()*invMatProjZ; - } -} -}} // namespace detail, cv - - -//! @endcond - -#endif // __OPENCV_IMGPROC_DETAIL_DISTORTION_MODEL_HPP__ diff --git a/IPL/include/opencv/opencv2/imgproc/detail/gcgraph.hpp b/IPL/include/opencv/opencv2/imgproc/detail/gcgraph.hpp new file mode 100644 index 0000000..db2ea0f --- /dev/null +++ b/IPL/include/opencv/opencv2/imgproc/detail/gcgraph.hpp @@ -0,0 +1,393 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// Intel License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000, Intel Corporation, all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of Intel Corporation may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef OPENCV_IMGPROC_DETAIL_GCGRAPH_HPP +#define OPENCV_IMGPROC_DETAIL_GCGRAPH_HPP + +//! @cond IGNORED + +namespace cv { namespace detail { +template class GCGraph +{ +public: + GCGraph(); + GCGraph( unsigned int vtxCount, unsigned int edgeCount ); + ~GCGraph(); + void create( unsigned int vtxCount, unsigned int edgeCount ); + int addVtx(); + void addEdges( int i, int j, TWeight w, TWeight revw ); + void addTermWeights( int i, TWeight sourceW, TWeight sinkW ); + TWeight maxFlow(); + bool inSourceSegment( int i ); +private: + class Vtx + { + public: + Vtx *next; // initialized and used in maxFlow() only + int parent; + int first; + int ts; + int dist; + TWeight weight; + uchar t; + }; + class Edge + { + public: + int dst; + int next; + TWeight weight; + }; + + std::vector vtcs; + std::vector edges; + TWeight flow; +}; + +template +GCGraph::GCGraph() +{ + flow = 0; +} +template +GCGraph::GCGraph( unsigned int vtxCount, unsigned int edgeCount ) +{ + create( vtxCount, edgeCount ); +} +template +GCGraph::~GCGraph() +{ +} +template +void GCGraph::create( unsigned int vtxCount, unsigned int edgeCount ) +{ + vtcs.reserve( vtxCount ); + edges.reserve( edgeCount + 2 ); + flow = 0; +} + +template +int GCGraph::addVtx() +{ + Vtx v; + memset( &v, 0, sizeof(Vtx)); + vtcs.push_back(v); + return (int)vtcs.size() - 1; +} + +template +void GCGraph::addEdges( int i, int j, TWeight w, TWeight revw ) +{ + CV_Assert( i>=0 && i<(int)vtcs.size() ); + CV_Assert( j>=0 && j<(int)vtcs.size() ); + CV_Assert( w>=0 && revw>=0 ); + CV_Assert( i != j ); + + if( !edges.size() ) + edges.resize( 2 ); + + Edge fromI, toI; + fromI.dst = j; + fromI.next = vtcs[i].first; + fromI.weight = w; + vtcs[i].first = (int)edges.size(); + edges.push_back( fromI ); + + toI.dst = i; + toI.next = vtcs[j].first; + toI.weight = revw; + vtcs[j].first = (int)edges.size(); + edges.push_back( toI ); +} + +template +void GCGraph::addTermWeights( int i, TWeight sourceW, TWeight sinkW ) +{ + CV_Assert( i>=0 && i<(int)vtcs.size() ); + + TWeight dw = vtcs[i].weight; + if( dw > 0 ) + sourceW += dw; + else + sinkW -= dw; + flow += (sourceW < sinkW) ? sourceW : sinkW; + vtcs[i].weight = sourceW - sinkW; +} + +template +TWeight GCGraph::maxFlow() +{ + const int TERMINAL = -1, ORPHAN = -2; + Vtx stub, *nilNode = &stub, *first = nilNode, *last = nilNode; + int curr_ts = 0; + stub.next = nilNode; + Vtx *vtxPtr = &vtcs[0]; + Edge *edgePtr = &edges[0]; + + std::vector orphans; + + // initialize the active queue and the graph vertices + for( int i = 0; i < (int)vtcs.size(); i++ ) + { + Vtx* v = vtxPtr + i; + v->ts = 0; + if( v->weight != 0 ) + { + last = last->next = v; + v->dist = 1; + v->parent = TERMINAL; + v->t = v->weight < 0; + } + else + v->parent = 0; + } + first = first->next; + last->next = nilNode; + nilNode->next = 0; + + // run the search-path -> augment-graph -> restore-trees loop + for(;;) + { + Vtx* v, *u; + int e0 = -1, ei = 0, ej = 0; + TWeight minWeight, weight; + uchar vt; + + // grow S & T search trees, find an edge connecting them + while( first != nilNode ) + { + v = first; + if( v->parent ) + { + vt = v->t; + for( ei = v->first; ei != 0; ei = edgePtr[ei].next ) + { + if( edgePtr[ei^vt].weight == 0 ) + continue; + u = vtxPtr+edgePtr[ei].dst; + if( !u->parent ) + { + u->t = vt; + u->parent = ei ^ 1; + u->ts = v->ts; + u->dist = v->dist + 1; + if( !u->next ) + { + u->next = nilNode; + last = last->next = u; + } + continue; + } + + if( u->t != vt ) + { + e0 = ei ^ vt; + break; + } + + if( u->dist > v->dist+1 && u->ts <= v->ts ) + { + // reassign the parent + u->parent = ei ^ 1; + u->ts = v->ts; + u->dist = v->dist + 1; + } + } + if( e0 > 0 ) + break; + } + // exclude the vertex from the active list + first = first->next; + v->next = 0; + } + + if( e0 <= 0 ) + break; + + // find the minimum edge weight along the path + minWeight = edgePtr[e0].weight; + CV_Assert( minWeight > 0 ); + // k = 1: source tree, k = 0: destination tree + for( int k = 1; k >= 0; k-- ) + { + for( v = vtxPtr+edgePtr[e0^k].dst;; v = vtxPtr+edgePtr[ei].dst ) + { + if( (ei = v->parent) < 0 ) + break; + weight = edgePtr[ei^k].weight; + minWeight = MIN(minWeight, weight); + CV_Assert( minWeight > 0 ); + } + weight = fabs(v->weight); + minWeight = MIN(minWeight, weight); + CV_Assert( minWeight > 0 ); + } + + // modify weights of the edges along the path and collect orphans + edgePtr[e0].weight -= minWeight; + edgePtr[e0^1].weight += minWeight; + flow += minWeight; + + // k = 1: source tree, k = 0: destination tree + for( int k = 1; k >= 0; k-- ) + { + for( v = vtxPtr+edgePtr[e0^k].dst;; v = vtxPtr+edgePtr[ei].dst ) + { + if( (ei = v->parent) < 0 ) + break; + edgePtr[ei^(k^1)].weight += minWeight; + if( (edgePtr[ei^k].weight -= minWeight) == 0 ) + { + orphans.push_back(v); + v->parent = ORPHAN; + } + } + + v->weight = v->weight + minWeight*(1-k*2); + if( v->weight == 0 ) + { + orphans.push_back(v); + v->parent = ORPHAN; + } + } + + // restore the search trees by finding new parents for the orphans + curr_ts++; + while( !orphans.empty() ) + { + Vtx* v2 = orphans.back(); + orphans.pop_back(); + + int d, minDist = INT_MAX; + e0 = 0; + vt = v2->t; + + for( ei = v2->first; ei != 0; ei = edgePtr[ei].next ) + { + if( edgePtr[ei^(vt^1)].weight == 0 ) + continue; + u = vtxPtr+edgePtr[ei].dst; + if( u->t != vt || u->parent == 0 ) + continue; + // compute the distance to the tree root + for( d = 0;; ) + { + if( u->ts == curr_ts ) + { + d += u->dist; + break; + } + ej = u->parent; + d++; + if( ej < 0 ) + { + if( ej == ORPHAN ) + d = INT_MAX-1; + else + { + u->ts = curr_ts; + u->dist = 1; + } + break; + } + u = vtxPtr+edgePtr[ej].dst; + } + + // update the distance + if( ++d < INT_MAX ) + { + if( d < minDist ) + { + minDist = d; + e0 = ei; + } + for( u = vtxPtr+edgePtr[ei].dst; u->ts != curr_ts; u = vtxPtr+edgePtr[u->parent].dst ) + { + u->ts = curr_ts; + u->dist = --d; + } + } + } + + if( (v2->parent = e0) > 0 ) + { + v2->ts = curr_ts; + v2->dist = minDist; + continue; + } + + /* no parent is found */ + v2->ts = 0; + for( ei = v2->first; ei != 0; ei = edgePtr[ei].next ) + { + u = vtxPtr+edgePtr[ei].dst; + ej = u->parent; + if( u->t != vt || !ej ) + continue; + if( edgePtr[ei^(vt^1)].weight && !u->next ) + { + u->next = nilNode; + last = last->next = u; + } + if( ej > 0 && vtxPtr+edgePtr[ej].dst == v2 ) + { + orphans.push_back(u); + u->parent = ORPHAN; + } + } + } + } + return flow; +} + +template +bool GCGraph::inSourceSegment( int i ) +{ + CV_Assert( i>=0 && i<(int)vtcs.size() ); + return vtcs[i].t == 0; +} + +}} // namespace detail, cv + + +//! @endcond + +#endif // OPENCV_IMGPROC_DETAIL_GCGRAPH_HPP diff --git a/IPL/include/opencv/opencv2/imgproc/hal/hal.hpp b/IPL/include/opencv/opencv2/imgproc/hal/hal.hpp new file mode 100644 index 0000000..ac20725 --- /dev/null +++ b/IPL/include/opencv/opencv2/imgproc/hal/hal.hpp @@ -0,0 +1,241 @@ +#ifndef CV_IMGPROC_HAL_HPP +#define CV_IMGPROC_HAL_HPP + +#include "opencv2/core/cvdef.h" +#include "opencv2/core/cvstd.hpp" +#include "opencv2/core/hal/interface.h" + +namespace cv { namespace hal { + +//! @addtogroup imgproc_hal_functions +//! @{ + +//--------------------------- +//! @cond IGNORED + +struct CV_EXPORTS Filter2D +{ + CV_DEPRECATED static Ptr create(uchar * , size_t , int , + int , int , + int , int , + int , int , + int , double , + int , int , + bool , bool ); + virtual void apply(uchar * , size_t , + uchar * , size_t , + int , int , + int , int , + int , int ) = 0; + virtual ~Filter2D() {} +}; + +struct CV_EXPORTS SepFilter2D +{ + CV_DEPRECATED static Ptr create(int , int , int , + uchar * , int , + uchar * , int , + int , int , + double , int ); + virtual void apply(uchar * , size_t , + uchar * , size_t , + int , int , + int , int , + int , int ) = 0; + virtual ~SepFilter2D() {} +}; + + +struct CV_EXPORTS Morph +{ + CV_DEPRECATED static Ptr create(int , int , int , int , int , + int , uchar * , size_t , + int , int , + int , int , + int , const double *, + int , bool , bool ); + virtual void apply(uchar * , size_t , uchar * , size_t , int , int , + int , int , int , int , + int , int , int , int ) = 0; + virtual ~Morph() {} +}; + +//! @endcond +//--------------------------- + +CV_EXPORTS void filter2D(int stype, int dtype, int kernel_type, + uchar * src_data, size_t src_step, + uchar * dst_data, size_t dst_step, + int width, int height, + int full_width, int full_height, + int offset_x, int offset_y, + uchar * kernel_data, size_t kernel_step, + int kernel_width, int kernel_height, + int anchor_x, int anchor_y, + double delta, int borderType, + bool isSubmatrix); + +CV_EXPORTS void sepFilter2D(int stype, int dtype, int ktype, + uchar * src_data, size_t src_step, + uchar * dst_data, size_t dst_step, + int width, int height, + int full_width, int full_height, + int offset_x, int offset_y, + uchar * kernelx_data, int kernelx_len, + uchar * kernely_data, int kernely_len, + int anchor_x, int anchor_y, + double delta, int borderType); + +CV_EXPORTS void morph(int op, int src_type, int dst_type, + uchar * src_data, size_t src_step, + uchar * dst_data, size_t dst_step, + int width, int height, + int roi_width, int roi_height, int roi_x, int roi_y, + int roi_width2, int roi_height2, int roi_x2, int roi_y2, + int kernel_type, uchar * kernel_data, size_t kernel_step, + int kernel_width, int kernel_height, int anchor_x, int anchor_y, + int borderType, const double borderValue[4], + int iterations, bool isSubmatrix); + + +CV_EXPORTS void resize(int src_type, + const uchar * src_data, size_t src_step, int src_width, int src_height, + uchar * dst_data, size_t dst_step, int dst_width, int dst_height, + double inv_scale_x, double inv_scale_y, int interpolation); + +CV_EXPORTS void warpAffine(int src_type, + const uchar * src_data, size_t src_step, int src_width, int src_height, + uchar * dst_data, size_t dst_step, int dst_width, int dst_height, + const double M[6], int interpolation, int borderType, const double borderValue[4]); + +CV_EXPORTS void warpPerspective(int src_type, + const uchar * src_data, size_t src_step, int src_width, int src_height, + uchar * dst_data, size_t dst_step, int dst_width, int dst_height, + const double M[9], int interpolation, int borderType, const double borderValue[4]); + +CV_EXPORTS void cvtBGRtoBGR(const uchar * src_data, size_t src_step, + uchar * dst_data, size_t dst_step, + int width, int height, + int depth, int scn, int dcn, bool swapBlue); + +CV_EXPORTS void cvtBGRtoBGR5x5(const uchar * src_data, size_t src_step, + uchar * dst_data, size_t dst_step, + int width, int height, + int scn, bool swapBlue, int greenBits); + +CV_EXPORTS void cvtBGR5x5toBGR(const uchar * src_data, size_t src_step, + uchar * dst_data, size_t dst_step, + int width, int height, + int dcn, bool swapBlue, int greenBits); + +CV_EXPORTS void cvtBGRtoGray(const uchar * src_data, size_t src_step, + uchar * dst_data, size_t dst_step, + int width, int height, + int depth, int scn, bool swapBlue); + +CV_EXPORTS void cvtGraytoBGR(const uchar * src_data, size_t src_step, + uchar * dst_data, size_t dst_step, + int width, int height, + int depth, int dcn); + +CV_EXPORTS void cvtBGR5x5toGray(const uchar * src_data, size_t src_step, + uchar * dst_data, size_t dst_step, + int width, int height, + int greenBits); + +CV_EXPORTS void cvtGraytoBGR5x5(const uchar * src_data, size_t src_step, + uchar * dst_data, size_t dst_step, + int width, int height, + int greenBits); +CV_EXPORTS void cvtBGRtoYUV(const uchar * src_data, size_t src_step, + uchar * dst_data, size_t dst_step, + int width, int height, + int depth, int scn, bool swapBlue, bool isCbCr); + +CV_EXPORTS void cvtYUVtoBGR(const uchar * src_data, size_t src_step, + uchar * dst_data, size_t dst_step, + int width, int height, + int depth, int dcn, bool swapBlue, bool isCbCr); + +CV_EXPORTS void cvtBGRtoXYZ(const uchar * src_data, size_t src_step, + uchar * dst_data, size_t dst_step, + int width, int height, + int depth, int scn, bool swapBlue); + +CV_EXPORTS void cvtXYZtoBGR(const uchar * src_data, size_t src_step, + uchar * dst_data, size_t dst_step, + int width, int height, + int depth, int dcn, bool swapBlue); + +CV_EXPORTS void cvtBGRtoHSV(const uchar * src_data, size_t src_step, + uchar * dst_data, size_t dst_step, + int width, int height, + int depth, int scn, bool swapBlue, bool isFullRange, bool isHSV); + +CV_EXPORTS void cvtHSVtoBGR(const uchar * src_data, size_t src_step, + uchar * dst_data, size_t dst_step, + int width, int height, + int depth, int dcn, bool swapBlue, bool isFullRange, bool isHSV); + +CV_EXPORTS void cvtBGRtoLab(const uchar * src_data, size_t src_step, + uchar * dst_data, size_t dst_step, + int width, int height, + int depth, int scn, bool swapBlue, bool isLab, bool srgb); + +CV_EXPORTS void cvtLabtoBGR(const uchar * src_data, size_t src_step, + uchar * dst_data, size_t dst_step, + int width, int height, + int depth, int dcn, bool swapBlue, bool isLab, bool srgb); + +CV_EXPORTS void cvtTwoPlaneYUVtoBGR(const uchar * src_data, size_t src_step, + uchar * dst_data, size_t dst_step, + int dst_width, int dst_height, + int dcn, bool swapBlue, int uIdx); + +//! Separate Y and UV planes +CV_EXPORTS void cvtTwoPlaneYUVtoBGR(const uchar * y_data, const uchar * uv_data, size_t src_step, + uchar * dst_data, size_t dst_step, + int dst_width, int dst_height, + int dcn, bool swapBlue, int uIdx); + +CV_EXPORTS void cvtThreePlaneYUVtoBGR(const uchar * src_data, size_t src_step, + uchar * dst_data, size_t dst_step, + int dst_width, int dst_height, + int dcn, bool swapBlue, int uIdx); + +CV_EXPORTS void cvtBGRtoThreePlaneYUV(const uchar * src_data, size_t src_step, + uchar * dst_data, size_t dst_step, + int width, int height, + int scn, bool swapBlue, int uIdx); + +//! Separate Y and UV planes +CV_EXPORTS void cvtBGRtoTwoPlaneYUV(const uchar * src_data, size_t src_step, + uchar * y_data, uchar * uv_data, size_t dst_step, + int width, int height, + int scn, bool swapBlue, int uIdx); + +CV_EXPORTS void cvtOnePlaneYUVtoBGR(const uchar * src_data, size_t src_step, + uchar * dst_data, size_t dst_step, + int width, int height, + int dcn, bool swapBlue, int uIdx, int ycn); + +CV_EXPORTS void cvtRGBAtoMultipliedRGBA(const uchar * src_data, size_t src_step, + uchar * dst_data, size_t dst_step, + int width, int height); + +CV_EXPORTS void cvtMultipliedRGBAtoRGBA(const uchar * src_data, size_t src_step, + uchar * dst_data, size_t dst_step, + int width, int height); + +CV_EXPORTS void integral(int depth, int sdepth, int sqdepth, + const uchar* src, size_t srcstep, + uchar* sum, size_t sumstep, + uchar* sqsum, size_t sqsumstep, + uchar* tilted, size_t tstep, + int width, int height, int cn); + +//! @} + +}} + +#endif // CV_IMGPROC_HAL_HPP diff --git a/IPL/include/opencv/opencv2/imgproc/hal/interface.h b/IPL/include/opencv/opencv2/imgproc/hal/interface.h new file mode 100644 index 0000000..f8dbcfe --- /dev/null +++ b/IPL/include/opencv/opencv2/imgproc/hal/interface.h @@ -0,0 +1,46 @@ +#ifndef OPENCV_IMGPROC_HAL_INTERFACE_H +#define OPENCV_IMGPROC_HAL_INTERFACE_H + +//! @addtogroup imgproc_hal_interface +//! @{ + +//! @name Interpolation modes +//! @sa cv::InterpolationFlags +//! @{ +#define CV_HAL_INTER_NEAREST 0 +#define CV_HAL_INTER_LINEAR 1 +#define CV_HAL_INTER_CUBIC 2 +#define CV_HAL_INTER_AREA 3 +#define CV_HAL_INTER_LANCZOS4 4 +//! @} + +//! @name Morphology operations +//! @sa cv::MorphTypes +//! @{ +#define CV_HAL_MORPH_ERODE 0 +#define CV_HAL_MORPH_DILATE 1 +//! @} + +//! @name Threshold types +//! @sa cv::ThresholdTypes +//! @{ +#define CV_HAL_THRESH_BINARY 0 +#define CV_HAL_THRESH_BINARY_INV 1 +#define CV_HAL_THRESH_TRUNC 2 +#define CV_HAL_THRESH_TOZERO 3 +#define CV_HAL_THRESH_TOZERO_INV 4 +#define CV_HAL_THRESH_MASK 7 +#define CV_HAL_THRESH_OTSU 8 +#define CV_HAL_THRESH_TRIANGLE 16 +//! @} + +//! @name Adaptive threshold algorithm +//! @sa cv::AdaptiveThresholdTypes +//! @{ +#define CV_HAL_ADAPTIVE_THRESH_MEAN_C 0 +#define CV_HAL_ADAPTIVE_THRESH_GAUSSIAN_C 1 +//! @} + +//! @} + +#endif diff --git a/IPL/include/opencv/opencv2/imgproc/imgproc_c.h b/IPL/include/opencv/opencv2/imgproc/imgproc_c.h index 87518d7..13d7cbb 100644 --- a/IPL/include/opencv/opencv2/imgproc/imgproc_c.h +++ b/IPL/include/opencv/opencv2/imgproc/imgproc_c.h @@ -40,8 +40,8 @@ // //M*/ -#ifndef __OPENCV_IMGPROC_IMGPROC_C_H__ -#define __OPENCV_IMGPROC_IMGPROC_C_H__ +#ifndef OPENCV_IMGPROC_IMGPROC_C_H +#define OPENCV_IMGPROC_IMGPROC_C_H #include "opencv2/imgproc/types_c.h" @@ -174,7 +174,7 @@ CVAPI(void) cvReleasePyramid( CvMat*** pyramid, int extra_layers ); */ CVAPI(void) cvPyrMeanShiftFiltering( const CvArr* src, CvArr* dst, double sp, double sr, int max_level CV_DEFAULT(1), - CvTermCriteria termcrit CV_DEFAULT(cvTermCriteria(CV_TERMCRIT_ITER+CV_TERMCRIT_EPS,5,1))); + CvTermCriteria termcrit CV_DEFAULT(cvTermCriteria(cv::TermCriteria::MAX_ITER+cv::TermCriteria::EPS,5,1))); /** @brief Segments image using seed "markers" @see cv::watershed @@ -260,52 +260,19 @@ CVAPI(void) cvConvertMaps( const CvArr* mapx, const CvArr* mapy, CvArr* mapxy, CvArr* mapalpha ); /** @brief Performs forward or inverse log-polar image transform -@see cv::logPolar +@see cv::warpPolar */ CVAPI(void) cvLogPolar( const CvArr* src, CvArr* dst, CvPoint2D32f center, double M, int flags CV_DEFAULT(CV_INTER_LINEAR+CV_WARP_FILL_OUTLIERS)); /** Performs forward or inverse linear-polar image transform -@see cv::linearPolar +@see cv::warpPolar */ CVAPI(void) cvLinearPolar( const CvArr* src, CvArr* dst, CvPoint2D32f center, double maxRadius, int flags CV_DEFAULT(CV_INTER_LINEAR+CV_WARP_FILL_OUTLIERS)); -/** @brief Transforms the input image to compensate lens distortion -@see cv::undistort -*/ -CVAPI(void) cvUndistort2( const CvArr* src, CvArr* dst, - const CvMat* camera_matrix, - const CvMat* distortion_coeffs, - const CvMat* new_camera_matrix CV_DEFAULT(0) ); - -/** @brief Computes transformation map from intrinsic camera parameters - that can used by cvRemap -*/ -CVAPI(void) cvInitUndistortMap( const CvMat* camera_matrix, - const CvMat* distortion_coeffs, - CvArr* mapx, CvArr* mapy ); - -/** @brief Computes undistortion+rectification map for a head of stereo camera -@see cv::initUndistortRectifyMap -*/ -CVAPI(void) cvInitUndistortRectifyMap( const CvMat* camera_matrix, - const CvMat* dist_coeffs, - const CvMat *R, const CvMat* new_camera_matrix, - CvArr* mapx, CvArr* mapy ); - -/** @brief Computes the original (undistorted) feature coordinates - from the observed (distorted) coordinates -@see cv::undistortPoints -*/ -CVAPI(void) cvUndistortPoints( const CvMat* src, CvMat* dst, - const CvMat* camera_matrix, - const CvMat* dist_coeffs, - const CvMat* R CV_DEFAULT(0), - const CvMat* P CV_DEFAULT(0)); - /** @brief Returns a structuring element of the specified size and shape for morphological operations. @note the created structuring element IplConvKernel\* element must be released in the end using @@ -982,10 +949,9 @@ CVAPI(void) cvFitLine( const CvArr* points, int dist_type, double param, * If a drawn figure is partially or completely outside of the image, it is clipped.* \****************************************************************************************/ -#define CV_RGB( r, g, b ) cvScalar( (b), (g), (r), 0 ) #define CV_FILLED -1 -#define CV_AA 16 +#define cv::LINE_AA 16 /** @brief Draws 4-connected, 8-connected or antialiased line segment connecting two points @see cv::line @@ -1037,9 +1003,10 @@ CV_INLINE void cvEllipseBox( CvArr* img, CvBox2D box, CvScalar color, int thickness CV_DEFAULT(1), int line_type CV_DEFAULT(8), int shift CV_DEFAULT(0) ) { - CvSize axes; - axes.width = cvRound(box.size.width*0.5); - axes.height = cvRound(box.size.height*0.5); + CvSize axes = cvSize( + cvRound(box.size.width*0.5), + cvRound(box.size.height*0.5) + ); cvEllipse( img, cvPointFrom32f( box.center ), axes, box.angle, 0, 360, color, thickness, line_type, shift ); @@ -1157,7 +1124,7 @@ CVAPI(void) cvInitFont( CvFont* font, int font_face, CV_INLINE CvFont cvFont( double scale, int thickness CV_DEFAULT(1) ) { CvFont font; - cvInitFont( &font, CV_FONT_HERSHEY_PLAIN, scale, scale, 0, thickness, CV_AA ); + cvInitFont( &font, CV_FONT_HERSHEY_PLAIN, scale, scale, 0, thickness, cv::LINE_AA ); return font; } @@ -1184,7 +1151,7 @@ CVAPI(CvScalar) cvColorToScalar( double packed_color, int arrtype ); /** @brief Returns the polygon points which make up the given ellipse. The ellipse is define by the box of size 'axes' rotated 'angle' around the 'center'. A partial -sweep of the ellipse arc can be done by spcifying arc_start and arc_end to be something other than +sweep of the ellipse arc can be done by specifying arc_start and arc_end to be something other than 0 and 360, respectively. The input array 'pts' must be large enough to hold the result. The total number of points stored into 'pts' is returned by this function. @see cv::ellipse2Poly diff --git a/IPL/include/opencv/opencv2/imgproc/types_c.h b/IPL/include/opencv/opencv2/imgproc/types_c.h index 5ecb460..d3e55f5 100644 --- a/IPL/include/opencv/opencv2/imgproc/types_c.h +++ b/IPL/include/opencv/opencv2/imgproc/types_c.h @@ -40,8 +40,8 @@ // //M*/ -#ifndef __OPENCV_IMGPROC_TYPES_C_H__ -#define __OPENCV_IMGPROC_TYPES_C_H__ +#ifndef OPENCV_IMGPROC_TYPES_C_H +#define OPENCV_IMGPROC_TYPES_C_H #include "opencv2/core/core_c.h" @@ -349,7 +349,17 @@ enum CV_BayerRG2RGB_EA = CV_BayerBG2BGR_EA, CV_BayerGR2RGB_EA = CV_BayerGB2BGR_EA, - CV_COLORCVT_MAX = 139 + CV_BayerBG2BGRA =139, + CV_BayerGB2BGRA =140, + CV_BayerRG2BGRA =141, + CV_BayerGR2BGRA =142, + + CV_BayerBG2RGBA =CV_BayerRG2BGRA, + CV_BayerGB2RGBA =CV_BayerGR2BGRA, + CV_BayerRG2RGBA =CV_BayerBG2BGRA, + CV_BayerGR2RGBA =CV_BayerGB2BGRA, + + CV_COLORCVT_MAX = 143 }; @@ -400,7 +410,7 @@ typedef struct CvMoments double mu20, mu11, mu02, mu30, mu21, mu12, mu03; /**< central moments */ double inv_sqrt_m00; /**< m00 != 0 ? 1/sqrt(m00) : 0 */ -#ifdef __cplusplus +#if defined(CV__ENABLE_C_API_CTORS) && defined(__cplusplus) CvMoments(){} CvMoments(const cv::Moments& m) { @@ -420,6 +430,36 @@ typedef struct CvMoments } CvMoments; +#ifdef __cplusplus +} // extern "C" + +CV_INLINE CvMoments cvMoments() +{ +#if !defined(CV__ENABLE_C_API_CTORS) + CvMoments self = CV_STRUCT_INITIALIZER; return self; +#else + return CvMoments(); +#endif +} + +CV_INLINE CvMoments cvMoments(const cv::Moments& m) +{ +#if !defined(CV__ENABLE_C_API_CTORS) + double am00 = std::abs(m.m00); + CvMoments self = { + m.m00, m.m10, m.m01, m.m20, m.m11, m.m02, m.m30, m.m21, m.m12, m.m03, + m.mu20, m.mu11, m.mu02, m.mu30, m.mu21, m.mu12, m.mu03, + am00 > DBL_EPSILON ? 1./std::sqrt(am00) : 0 + }; + return self; +#else + return CvMoments(m); +#endif +} + +extern "C" { +#endif // __cplusplus + /** Hu invariants */ typedef struct CvHuMoments { @@ -491,15 +531,8 @@ enum CV_POLY_APPROX_DP = 0 }; -/** @brief Shape matching methods - -\f$A\f$ denotes object1,\f$B\f$ denotes object2 - -\f$\begin{array}{l} m^A_i = \mathrm{sign} (h^A_i) \cdot \log{h^A_i} \\ m^B_i = \mathrm{sign} (h^B_i) \cdot \log{h^B_i} \end{array}\f$ - -and \f$h^A_i, h^B_i\f$ are the Hu moments of \f$A\f$ and \f$B\f$ , respectively. -*/ -enum ShapeMatchModes +/** Shape matching methods */ +enum { CV_CONTOURS_MATCH_I1 =1, //!< \f[I_1(A,B) = \sum _{i=1...7} \left | \frac{1}{m^A_i} - \frac{1}{m^B_i} \right |\f] CV_CONTOURS_MATCH_I2 =2, //!< \f[I_2(A,B) = \sum _{i=1...7} \left | m^A_i - m^B_i \right |\f] diff --git a/IPL/include/opencv/opencv2/intensity_transform.hpp b/IPL/include/opencv/opencv2/intensity_transform.hpp new file mode 100644 index 0000000..12447f5 --- /dev/null +++ b/IPL/include/opencv/opencv2/intensity_transform.hpp @@ -0,0 +1,78 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#ifndef OPENCV_INTENSITY_TRANSFORM_H +#define OPENCV_INTENSITY_TRANSFORM_H + +#include "opencv2/core.hpp" + +/** + * @defgroup intensity_transform The module brings implementations of intensity transformation algorithms to adjust image contrast. + * + * Namespace for all functions is cv::intensity_trasnform. + * + * ### Supported Algorithms + * - Autoscaling + * - Log Transformations + * - Power-Law (Gamma) Transformations + * - Contrast Stretching + * + * Reference from following book and websites: + * - Digital Image Processing 4th Edition Chapter 3 [Rafael C. Gonzalez, Richard E. Woods] @cite Gonzalez2018 + * - http://www.cs.uregina.ca/Links/class-info/425/Lab3/ @cite lcs435lab + * - https://theailearner.com/2019/01/30/contrast-stretching/ @cite theailearner +*/ + +namespace cv { +namespace intensity_transform { + +//! @addtogroup intensity_transform +//! @{ + +/** + * @brief Given an input bgr or grayscale image and constant c, apply log transformation to the image + * on domain [0, 255] and return the resulting image. + * + * @param input input bgr or grayscale image. + * @param output resulting image of log transformations. +*/ +CV_EXPORTS_W void logTransform(const Mat input, Mat& output); + +/** + * @brief Given an input bgr or grayscale image and constant gamma, apply power-law transformation, + * a.k.a. gamma correction to the image on domain [0, 255] and return the resulting image. + * + * @param input input bgr or grayscale image. + * @param output resulting image of gamma corrections. + * @param gamma constant in c*r^gamma where r is pixel value. +*/ +CV_EXPORTS_W void gammaCorrection(const Mat input, Mat& output, const float gamma); + +/** + * @brief Given an input bgr or grayscale image, apply autoscaling on domain [0, 255] to increase + * the contrast of the input image and return the resulting image. + * + * @param input input bgr or grayscale image. + * @param output resulting image of autoscaling. +*/ +CV_EXPORTS_W void autoscaling(const Mat input, Mat& output); + +/** + * @brief Given an input bgr or grayscale image, apply linear contrast stretching on domain [0, 255] + * and return the resulting image. + * + * @param input input bgr or grayscale image. + * @param output resulting image of contrast stretching. + * @param r1 x coordinate of first point (r1, s1) in the transformation function. + * @param s1 y coordinate of first point (r1, s1) in the transformation function. + * @param r2 x coordinate of second point (r2, s2) in the transformation function. + * @param s2 y coordinate of second point (r2, s2) in the transformation function. +*/ +CV_EXPORTS_W void contrastStretching(const Mat input, Mat& output, const int r1, const int s1, const int r2, const int s2); + +//! @} + +}} // cv::intensity_transform:: + +#endif \ No newline at end of file diff --git a/IPL/include/opencv/opencv2/line_descriptor/descriptor.hpp b/IPL/include/opencv/opencv2/line_descriptor/descriptor.hpp index 65c4395..15e8fe7 100644 --- a/IPL/include/opencv/opencv2/line_descriptor/descriptor.hpp +++ b/IPL/include/opencv/opencv2/line_descriptor/descriptor.hpp @@ -56,10 +56,7 @@ #include #include "opencv2/core/utility.hpp" -//#include "opencv2/core/private.hpp" #include -#include -#include #include "opencv2/core.hpp" /* define data types */ @@ -102,73 +99,73 @@ Apart from fields inspired to KeyPoint class, KeyLines stores information about original image and in octave it was extracted from, about line's length and number of pixels it covers. */ -struct CV_EXPORTS KeyLine +struct CV_EXPORTS_W_SIMPLE KeyLine { public: /** orientation of the line */ - float angle; + CV_PROP_RW float angle; /** object ID, that can be used to cluster keylines by the line they represent */ - int class_id; + CV_PROP_RW int class_id; /** octave (pyramid layer), from which the keyline has been extracted */ - int octave; + CV_PROP_RW int octave; /** coordinates of the middlepoint */ - Point2f pt; + CV_PROP_RW Point2f pt; /** the response, by which the strongest keylines have been selected. It's represented by the ratio between line's length and maximum between image's width and height */ - float response; + CV_PROP_RW float response; /** minimum area containing line */ - float size; + CV_PROP_RW float size; /** lines's extremes in original image */ - float startPointX; - float startPointY; - float endPointX; - float endPointY; + CV_PROP_RW float startPointX; + CV_PROP_RW float startPointY; + CV_PROP_RW float endPointX; + CV_PROP_RW float endPointY; /** line's extremes in image it was extracted from */ - float sPointInOctaveX; - float sPointInOctaveY; - float ePointInOctaveX; - float ePointInOctaveY; + CV_PROP_RW float sPointInOctaveX; + CV_PROP_RW float sPointInOctaveY; + CV_PROP_RW float ePointInOctaveX; + CV_PROP_RW float ePointInOctaveY; /** the length of line */ - float lineLength; + CV_PROP_RW float lineLength; /** number of pixels covered by the line */ - int numOfPixels; + CV_PROP_RW int numOfPixels; /** Returns the start point of the line in the original image */ - Point2f getStartPoint() const + CV_WRAP Point2f getStartPoint() const { return Point2f(startPointX, startPointY); } /** Returns the end point of the line in the original image */ - Point2f getEndPoint() const + CV_WRAP Point2f getEndPoint() const { return Point2f(endPointX, endPointY); } /** Returns the start point of the line in the octave it was extracted from */ - Point2f getStartPointInOctave() const + CV_WRAP Point2f getStartPointInOctave() const { return Point2f(sPointInOctaveX, sPointInOctaveY); } /** Returns the end point of the line in the octave it was extracted from */ - Point2f getEndPointInOctave() const + CV_WRAP Point2f getEndPointInOctave() const { return Point2f(ePointInOctaveX, ePointInOctaveY); } /** constructor */ - KeyLine() + CV_WRAP KeyLine() { } }; @@ -180,7 +177,7 @@ Class' interface is mainly based on the ones of classical detectors and extracto Feature2d's @ref features2d_main and @ref features2d_match. Retrieved information about lines is stored in line_descriptor::KeyLine objects. */ -class CV_EXPORTS BinaryDescriptor : public Algorithm +class CV_EXPORTS_W BinaryDescriptor : public Algorithm { public: @@ -217,14 +214,14 @@ class CV_EXPORTS BinaryDescriptor : public Algorithm @param parameters configuration parameters BinaryDescriptor::Params If no argument is provided, constructor sets default values (see comments in the code snippet in - previous section). Default values are strongly reccomended. + previous section). Default values are strongly recommended. */ BinaryDescriptor( const BinaryDescriptor::Params ¶meters = BinaryDescriptor::Params() ); /** @brief Create a BinaryDescriptor object with default parameters (or with the ones provided) and return a smart pointer to it */ - static Ptr createBinaryDescriptor(); + CV_WRAP static Ptr createBinaryDescriptor(); static Ptr createBinaryDescriptor( Params parameters ); /** destructor */ @@ -232,37 +229,37 @@ class CV_EXPORTS BinaryDescriptor : public Algorithm /** @brief Get current number of octaves */ - int getNumOfOctaves();/*CV_WRAP*/ + CV_WRAP int getNumOfOctaves(); /** @brief Set number of octaves @param octaves number of octaves */ - void setNumOfOctaves( int octaves );/*CV_WRAP*/ + CV_WRAP void setNumOfOctaves( int octaves ); /** @brief Get current width of bands */ - int getWidthOfBand();/*CV_WRAP*/ + CV_WRAP int getWidthOfBand(); /** @brief Set width of bands @param width width of bands */ - void setWidthOfBand( int width );/*CV_WRAP*/ + CV_WRAP void setWidthOfBand( int width ); /** @brief Get current reduction ratio (used in Gaussian pyramids) */ - int getReductionRatio();/*CV_WRAP*/ + CV_WRAP int getReductionRatio(); /** @brief Set reduction ratio (used in Gaussian pyramids) @param rRatio reduction ratio */ - void setReductionRatio( int rRatio ); + CV_WRAP void setReductionRatio( int rRatio ); /** @brief Read parameters from a FileNode object and store them @param fn source FileNode file */ - virtual void read( const cv::FileNode& fn ); + virtual void read( const cv::FileNode& fn ) CV_OVERRIDE; /** @brief Store parameters to a FileStorage object @param fs output FileStorage file */ - virtual void write( cv::FileStorage& fs ) const; + virtual void write( cv::FileStorage& fs ) const CV_OVERRIDE; /** @brief Requires line detection @@ -270,7 +267,7 @@ class CV_EXPORTS BinaryDescriptor : public Algorithm @param keypoints vector that will store extracted lines for one or more images @param mask mask matrix to detect only KeyLines of interest */ - void detect( const Mat& image, CV_OUT std::vector& keypoints, const Mat& mask = Mat() ); + CV_WRAP void detect( const Mat& image, CV_OUT std::vector& keypoints, const Mat& mask = Mat() ); /** @overload @@ -288,7 +285,7 @@ class CV_EXPORTS BinaryDescriptor : public Algorithm @param descriptors @param returnFloatDescr flag (when set to true, original non-binary descriptors are returned) */ - void compute( const Mat& image, CV_OUT CV_IN_OUT std::vector& keylines, CV_OUT Mat& descriptors, bool returnFloatDescr = false ) const; + CV_WRAP void compute( const Mat& image, CV_OUT CV_IN_OUT std::vector& keylines, CV_OUT Mat& descriptors, bool returnFloatDescr = false ) const; /** @overload @@ -404,6 +401,12 @@ class CV_EXPORTS BinaryDescriptor : public Algorithm unsigned int octaveCount; //the decriptor of line std::vector descriptor; + + OctaveSingleLine() : startPointX(0), startPointY(0), endPointX(0), endPointY(0), + sPointInOctaveX(0), sPointInOctaveY(0), ePointInOctaveX(0), ePointInOctaveY(0), + direction(0), salience(0), lineLength(0), numOfPixels(0), octaveCount(0), + descriptor(std::vector()) + {} }; struct Pixel @@ -429,15 +432,15 @@ class CV_EXPORTS BinaryDescriptor : public Algorithm typedef std::list PixelChain; //each edge is a pixel chain - struct EDLineParam + struct CV_EXPORTS_W_SIMPLE EDLineParam { - int ksize; - float sigma; - float gradientThreshold; - float anchorThreshold; - int scanIntervals; - int minLineLen; - double lineFitErrThreshold; + CV_PROP_RW int ksize; + CV_PROP_RW float sigma; + CV_PROP_RW float gradientThreshold; + CV_PROP_RW float anchorThreshold; + CV_PROP_RW int scanIntervals; + CV_PROP_RW int minLineLen; + CV_PROP_RW double lineFitErrThreshold; }; #define RELATIVE_ERROR_FACTOR 100.0 @@ -452,13 +455,19 @@ class CV_EXPORTS BinaryDescriptor : public Algorithm * PS: The linking step of edge detection has a little bit difference with the Edge drawing algorithm * described in the paper. The edge chain doesn't stop when the pixel direction is changed. */ - class EDLineDetector + class CV_EXPORTS_W EDLineDetector { public: - EDLineDetector(); - EDLineDetector( EDLineParam param ); + CV_WRAP EDLineDetector(); + CV_WRAP_AS(EDLineDetectorWithParams) EDLineDetector( EDLineParam param ); ~EDLineDetector(); + /** @brief Creates an EDLineDetector object, using smart pointers. + */ + CV_WRAP static Ptr createEDLineDetector(); + + + CV_WRAP_AS(createEDLineDetectorWithParams) static Ptr createEDLineDetector(EDLineParam params); /*extract edges from image *image: In, gray image; *edges: Out, store the edges, each edge is a pixel chain @@ -474,7 +483,7 @@ class CV_EXPORTS BinaryDescriptor : public Algorithm int EDline( cv::Mat &image, LineChains &lines ); /** extract line from image, and store them */ - int EDline( cv::Mat &image ); + CV_WRAP int EDline( cv::Mat &image ); cv::Mat dxImg_; //store the dxImg; @@ -619,211 +628,6 @@ class CV_EXPORTS BinaryDescriptor : public Algorithm cv::Mat_ tempVecLineFit; //the vector used in line fit function; - /** Compare doubles by relative error. - The resulting rounding error after floating point computations - depend on the specific operations done. The same number computed by - different algorithms could present different rounding errors. For a - useful comparison, an estimation of the relative rounding error - should be considered and compared to a factor times EPS. The factor - should be related to the accumulated rounding error in the chain of - computation. Here, as a simplification, a fixed factor is used. - */ - static int double_equal( double a, double b ) - { - double abs_diff, aa, bb, abs_max; - /* trivial case */ - if( a == b ) - return true; - abs_diff = fabs( a - b ); - aa = fabs( a ); - bb = fabs( b ); - abs_max = aa > bb ? aa : bb; - - /* DBL_MIN is the smallest normalized number, thus, the smallest - number whose relative error is bounded by DBL_EPSILON. For - smaller numbers, the same quantization steps as for DBL_MIN - are used. Then, for smaller numbers, a meaningful "relative" - error should be computed by dividing the difference by DBL_MIN. */ - if( abs_max < DBL_MIN ) - abs_max = DBL_MIN; - - /* equal if relative error <= factor x eps */ - return ( abs_diff / abs_max ) <= ( RELATIVE_ERROR_FACTOR * DBL_EPSILON ); - } - - /** Computes the natural logarithm of the absolute value of - the gamma function of x using the Lanczos approximation. - See http://www.rskey.org/gamma.htm - The formula used is - @f[ - \Gamma(x) = \frac{ \sum_{n=0}^{N} q_n x^n }{ \Pi_{n=0}^{N} (x+n) } - (x+5.5)^{x+0.5} e^{-(x+5.5)} - @f] - so - @f[ - \log\Gamma(x) = \log\left( \sum_{n=0}^{N} q_n x^n \right) - + (x+0.5) \log(x+5.5) - (x+5.5) - \sum_{n=0}^{N} \log(x+n) - @f] - and - q0 = 75122.6331530, - q1 = 80916.6278952, - q2 = 36308.2951477, - q3 = 8687.24529705, - q4 = 1168.92649479, - q5 = 83.8676043424, - q6 = 2.50662827511. - */ - static double log_gamma_lanczos( double x ) - { - static double q[7] = - { 75122.6331530, 80916.6278952, 36308.2951477, 8687.24529705, 1168.92649479, 83.8676043424, 2.50662827511 }; - double a = ( x + 0.5 ) * log( x + 5.5 ) - ( x + 5.5 ); - double b = 0.0; - int n; - for ( n = 0; n < 7; n++ ) - { - a -= log( x + (double) n ); - b += q[n] * pow( x, (double) n ); - } - return a + log( b ); - } - - /** Computes the natural logarithm of the absolute value of - the gamma function of x using Windschitl method. - See http://www.rskey.org/gamma.htm - The formula used is - @f[ - \Gamma(x) = \sqrt{\frac{2\pi}{x}} \left( \frac{x}{e} - \sqrt{ x\sinh(1/x) + \frac{1}{810x^6} } \right)^x - @f] - so - @f[ - \log\Gamma(x) = 0.5\log(2\pi) + (x-0.5)\log(x) - x - + 0.5x\log\left( x\sinh(1/x) + \frac{1}{810x^6} \right). - @f] - This formula is a good approximation when x > 15. - */ - static double log_gamma_windschitl( double x ) - { - return 0.918938533204673 + ( x - 0.5 ) * log( x ) - x + 0.5 * x * log( x * sinh( 1 / x ) + 1 / ( 810.0 * pow( x, 6.0 ) ) ); - } - - /** Computes -log10(NFA). - NFA stands for Number of False Alarms: - @f[ - \mathrm{NFA} = NT \cdot B(n,k,p) - @f] - - NT - number of tests - - B(n,k,p) - tail of binomial distribution with parameters n,k and p: - @f[ - B(n,k,p) = \sum_{j=k}^n - \left(\begin{array}{c}n\\j\end{array}\right) - p^{j} (1-p)^{n-j} - @f] - The value -log10(NFA) is equivalent but more intuitive than NFA: - - -1 corresponds to 10 mean false alarms - - 0 corresponds to 1 mean false alarm - - 1 corresponds to 0.1 mean false alarms - - 2 corresponds to 0.01 mean false alarms - - ... - Used this way, the bigger the value, better the detection, - and a logarithmic scale is used. - @param n,k,p binomial parameters. - @param logNT logarithm of Number of Tests - The computation is based in the gamma function by the following - relation: - @f[ - \left(\begin{array}{c}n\\k\end{array}\right) - = \frac{ \Gamma(n+1) }{ \Gamma(k+1) \cdot \Gamma(n-k+1) }. - @f] - We use efficient algorithms to compute the logarithm of - the gamma function. - To make the computation faster, not all the sum is computed, part - of the terms are neglected based on a bound to the error obtained - (an error of 10% in the result is accepted). - */ - static double nfa( int n, int k, double p, double logNT ) - { - double tolerance = 0.1; /* an error of 10% in the result is accepted */ - double log1term, term, bin_term, mult_term, bin_tail, err, p_term; - int i; - - /* check parameters */ - if( n < 0 || k < 0 || k > n || p <= 0.0 || p >= 1.0 ) - { - std::cout << "nfa: wrong n, k or p values." << std::endl; - exit( 0 ); - } - /* trivial cases */ - if( n == 0 || k == 0 ) - return -logNT; - if( n == k ) - return -logNT - (double) n * log10( p ); - - /* probability term */ - p_term = p / ( 1.0 - p ); - - /* compute the first term of the series */ - /* - binomial_tail(n,k,p) = sum_{i=k}^n bincoef(n,i) * p^i * (1-p)^{n-i} - where bincoef(n,i) are the binomial coefficients. - But - bincoef(n,k) = gamma(n+1) / ( gamma(k+1) * gamma(n-k+1) ). - We use this to compute the first term. Actually the log of it. - */ - log1term = log_gamma( (double) n + 1.0 )- log_gamma( (double ) k + 1.0 )- log_gamma( (double ) ( n - k ) + 1.0 ) -+ (double) k * log( p ) -+ (double) ( n - k ) * log( 1.0 - p ); -term = exp( log1term ); - -/* in some cases no more computations are needed */ -if( double_equal( term, 0.0 ) ) -{ /* the first term is almost zero */ - if( (double) k > (double) n * p ) /* at begin or end of the tail? */ - return -log1term / MLN10 - logNT; /* end: use just the first term */ - else - return -logNT; /* begin: the tail is roughly 1 */ -} - -/* compute more terms if needed */ -bin_tail = term; -for ( i = k + 1; i <= n; i++ ) -{ - /* As - term_i = bincoef(n,i) * p^i * (1-p)^(n-i) - and - bincoef(n,i)/bincoef(n,i-1) = n-i+1 / i, - then, - term_i / term_i-1 = (n-i+1)/i * p/(1-p) - and - term_i = term_i-1 * (n-i+1)/i * p/(1-p). - p/(1-p) is computed only once and stored in 'p_term'. - */ - bin_term = (double) ( n - i + 1 ) / (double) i; - mult_term = bin_term * p_term; - term *= mult_term; - bin_tail += term; - if( bin_term < 1.0 ) - { - /* When bin_term<1 then mult_term_ji. - Then, the error on the binomial tail when truncated at - the i term can be bounded by a geometric series of form - term_i * sum mult_term_i^j. */ - err = term * ( ( 1.0 - pow( mult_term, (double) ( n - i + 1 ) ) ) / ( 1.0 - mult_term ) - 1.0 ); - /* One wants an error at most of tolerance*final_result, or: - tolerance * abs(-log10(bin_tail)-logNT). - Now, the error that can be accepted on bin_tail is - given by tolerance*final_result divided by the derivative - of -log10(x) when x=bin_tail. that is: - tolerance * abs(-log10(bin_tail)-logNT) / (1/bin_tail) - Finally, we truncate the tail if the error is less than: - tolerance * abs(-log10(bin_tail)-logNT) * bin_tail */ - if( err < tolerance * fabs( -log10( bin_tail ) - logNT ) * bin_tail ) - break; - } -} -return -log10( bin_tail ) - logNT; -} }; // Specifies a vector of lines. @@ -892,20 +696,49 @@ the one used in *BinaryDescriptor* class, data associated to a line's extremes i in octave it was extracted from, coincide. KeyLine's field *class_id* is used as an index to indicate the order of extraction of a line inside a single octave. */ -class CV_EXPORTS LSDDetector : public Algorithm +struct CV_EXPORTS_W_SIMPLE LSDParam +{ + CV_PROP_RW double scale ; + CV_PROP_RW double sigma_scale; + CV_PROP_RW double quant; + CV_PROP_RW double ang_th; + CV_PROP_RW double log_eps; + CV_PROP_RW double density_th ; + CV_PROP_RW int n_bins ; + + +CV_WRAP LSDParam():scale(0.8), + sigma_scale(0.6), + quant(2.0), + ang_th(22.5), + log_eps(0), + density_th(0.7), + n_bins(1024){} + +}; + +class CV_EXPORTS_W LSDDetector : public Algorithm { public: /* constructor */ -/*CV_WRAP*/ -LSDDetector() +CV_WRAP LSDDetector() : params() +{ +} +; + +CV_WRAP_AS(LSDDetectorWithParams) LSDDetector(LSDParam _params) : params(_params) { } ; /** @brief Creates ad LSDDetector object, using smart pointers. */ -static Ptr createLSDDetector(); +CV_WRAP static Ptr createLSDDetector(); + + +CV_WRAP_AS(createLSDDetectorWithParams) static Ptr createLSDDetector(LSDParam params); + /** @brief Detect lines inside an image. @@ -915,7 +748,7 @@ static Ptr createLSDDetector(); @param numOctaves number of octaves inside pyramid @param mask mask matrix to detect only KeyLines of interest */ -void detect( const Mat& image, CV_OUT std::vector& keypoints, int scale, int numOctaves, const Mat& mask = Mat() ); +CV_WRAP void detect( const Mat& image, CV_OUT std::vector& keypoints, int scale, int numOctaves, const Mat& mask = Mat() ); /** @overload @param images input images @@ -924,7 +757,7 @@ void detect( const Mat& image, CV_OUT std::vector& keypoints, int scale @param numOctaves number of octaves inside pyramid @param masks vector of mask matrices to detect only KeyLines of interest from each input image */ -void detect( const std::vector& images, std::vector >& keylines, int scale, int numOctaves, +CV_WRAP void detect( const std::vector& images, std::vector >& keylines, int scale, int numOctaves, const std::vector& masks = std::vector() ) const; private: @@ -936,6 +769,9 @@ void detectImpl( const Mat& imageSrc, std::vector& keylines, int numOct /* matrices for Gaussian pyramids */ std::vector gaussianPyrs; + +/* parameters */ +LSDParam params; }; /** @brief furnishes all functionalities for querying a dataset provided by user or internal to @@ -976,7 +812,7 @@ candidates \f$\mathcal{N}_i(\mathbf{q})\f$ is obtained. The union of sets of **q**. Then, last step of algorithm is computing the Hamming distance between **q** and each element in \f$\mathcal{N}(\mathbf{q})\f$, deleting the codes that are distant more that *r* from **q**. */ -class CV_EXPORTS BinaryDescriptorMatcher : public Algorithm +class CV_EXPORTS_W BinaryDescriptorMatcher : public Algorithm { public: @@ -988,7 +824,7 @@ or from the one internal to class @param matches vector to host retrieved matches @param mask mask to select which input descriptors must be matched to one in dataset */ -void match( const Mat& queryDescriptors, const Mat& trainDescriptors, std::vector& matches, const Mat& mask = Mat() ) const; +CV_WRAP void match( const Mat& queryDescriptors, const Mat& trainDescriptors, CV_OUT std::vector& matches, const Mat& mask = Mat() ) const; /** @overload @param queryDescriptors query descriptors @@ -997,7 +833,7 @@ void match( const Mat& queryDescriptors, const Mat& trainDescriptors, std::vecto (the *i*-th mask in vector indicates whether each input query can be matched with descriptors in dataset relative to *i*-th image) */ -void match( const Mat& queryDescriptors, std::vector& matches, const std::vector& masks = std::vector() ); +CV_WRAP_AS(matchQuery) void match( const Mat& queryDescriptors, CV_OUT std::vector& matches, const std::vector& masks = std::vector() ); /** @brief For every input query descriptor, retrieve the best *k* matching ones from a dataset provided from user or from the one internal to class @@ -1010,7 +846,7 @@ user or from the one internal to class @param compactResult flag to obtain a compact result (if true, a vector that doesn't contain any matches for a given query is not inserted in final result) */ -void knnMatch( const Mat& queryDescriptors, const Mat& trainDescriptors, std::vector >& matches, int k, const Mat& mask = Mat(), +CV_WRAP void knnMatch( const Mat& queryDescriptors, const Mat& trainDescriptors, CV_OUT std::vector >& matches, int k, const Mat& mask = Mat(), bool compactResult = false ) const; /** @overload @@ -1023,7 +859,7 @@ dataset relative to *i*-th image) @param compactResult flag to obtain a compact result (if true, a vector that doesn't contain any matches for a given query is not inserted in final result) */ -void knnMatch( const Mat& queryDescriptors, std::vector >& matches, int k, const std::vector& masks = std::vector(), +CV_WRAP_AS(knnMatchQuery) void knnMatch( const Mat& queryDescriptors, std::vector >& matches, int k, const std::vector& masks = std::vector(), bool compactResult = false ); /** @brief For every input query descriptor, retrieve, from a dataset provided from user or from the one @@ -1076,13 +912,13 @@ static Ptr createBinaryDescriptorMatcher(); /** @brief Clear dataset and internal data */ -void clear(); +void clear() CV_OVERRIDE; /** @brief Constructor. The BinaryDescriptorMatcher constructed is able to store and manage 256-bits long entries. */ -BinaryDescriptorMatcher(); +CV_WRAP BinaryDescriptorMatcher(); /** destructor */ ~BinaryDescriptorMatcher() @@ -1095,7 +931,7 @@ class BucketGroup public: /** constructor */ -BucketGroup(); +BucketGroup(bool needAllocateGroup = true); /** destructor */ ~BucketGroup(); @@ -1126,7 +962,7 @@ class SparseHashtable static const int MAX_B; /** Bins (each bin is an Array object for duplicates of the same key) */ -BucketGroup *table; +std::vector table; public: @@ -1172,12 +1008,15 @@ length = 0; /** constructor setting sequence's length */ bitarray( UINT64 _bits ) { +arr = NULL; init( _bits ); } /** initializer of private fields */ void init( UINT64 _bits ) { +if( arr ) +delete[] arr; length = (UINT32) ceil( _bits / 32.00 ); arr = new UINT32[length]; erase(); @@ -1248,13 +1087,13 @@ UINT64 N; cv::Mat codes; /** Counter for eliminating duplicate results (it is not thread safe) */ -bitarray *counter; +Ptr counter; /** Array of m hashtables */ -SparseHashtable *H; +std::vector H; /** Volume of a b-bit Hamming ball with radius s (for s = 0 to d) */ -UINT32 *xornum; +std::vector xornum; /** Used within generation of binary codes at a certain Hamming distance */ int power[100]; @@ -1293,7 +1132,7 @@ Mat descriptorsMat; std::map indexesMap; /** internal MiHaser representing dataset */ -Mihasher* dataset; +Ptr dataset; /** index from which next added descriptors' bunch must begin */ int nextAddedIndex; @@ -1311,9 +1150,9 @@ int descrInDS; -------------------------------------------------------------------------------------------- */ /** struct for drawing options */ -struct CV_EXPORTS DrawLinesMatchesFlags +struct CV_EXPORTS_W_SIMPLE DrawLinesMatchesFlags { -enum +CV_PROP_RW enum { DEFAULT = 0, //!< Output image matrix will be created (Mat::create), //!< i.e. existing memory of output image may be reused. @@ -1342,10 +1181,10 @@ NOT_DRAW_SINGLE_LINES = 2//!< Single keylines will not be drawn. @note If both *matchColor* and *singleLineColor* are set to their default values, function draws matched lines and line connecting them with same color */ -CV_EXPORTS void drawLineMatches( const Mat& img1, const std::vector& keylines1, const Mat& img2, const std::vector& keylines2, - const std::vector& matches1to2, Mat& outImg, const Scalar& matchColor = Scalar::all( -1 ), - const Scalar& singleLineColor = Scalar::all( -1 ), const std::vector& matchesMask = std::vector(), - int flags = DrawLinesMatchesFlags::DEFAULT ); +CV_EXPORTS_W void drawLineMatches( const Mat& img1, const std::vector& keylines1, const Mat& img2, const std::vector& keylines2, + const std::vector& matches1to2, CV_OUT Mat& outImg, const Scalar& matchColor = Scalar::all( -1 ), + const Scalar& singleLineColor = Scalar::all( -1 ), const std::vector& matchesMask = std::vector(), + int flags = DrawLinesMatchesFlags::DEFAULT ); /** @brief Draws keylines. @@ -1355,8 +1194,8 @@ CV_EXPORTS void drawLineMatches( const Mat& img1, const std::vector& ke @param color color of lines to be drawn (if set to defaul value, color is chosen randomly) @param flags drawing flags */ -CV_EXPORTS void drawKeylines( const Mat& image, const std::vector& keylines, Mat& outImage, const Scalar& color = Scalar::all( -1 ), - int flags = DrawLinesMatchesFlags::DEFAULT ); +CV_EXPORTS_W void drawKeylines( const Mat& image, const std::vector& keylines, CV_OUT Mat& outImage, const Scalar& color = Scalar::all( -1 ), + int flags = DrawLinesMatchesFlags::DEFAULT ); //! @} diff --git a/IPL/include/opencv/opencv2/ml.hpp b/IPL/include/opencv/opencv2/ml.hpp index 0b90269..7fdb460 100644 --- a/IPL/include/opencv/opencv2/ml.hpp +++ b/IPL/include/opencv/opencv2/ml.hpp @@ -41,8 +41,8 @@ // //M*/ -#ifndef __OPENCV_ML_HPP__ -#define __OPENCV_ML_HPP__ +#ifndef OPENCV_ML_HPP +#define OPENCV_ML_HPP #ifdef __cplusplus # include "opencv2/core.hpp" @@ -104,7 +104,7 @@ enum SampleTypes It is used for optimizing statmodel accuracy by varying model parameters, the accuracy estimate being computed by cross-validation. */ -class CV_EXPORTS ParamGrid +class CV_EXPORTS_W ParamGrid { public: /** @brief Default constructor */ @@ -112,17 +112,25 @@ class CV_EXPORTS ParamGrid /** @brief Constructor with parameters */ ParamGrid(double _minVal, double _maxVal, double _logStep); - double minVal; //!< Minimum value of the statmodel parameter. Default value is 0. - double maxVal; //!< Maximum value of the statmodel parameter. Default value is 0. + CV_PROP_RW double minVal; //!< Minimum value of the statmodel parameter. Default value is 0. + CV_PROP_RW double maxVal; //!< Maximum value of the statmodel parameter. Default value is 0. /** @brief Logarithmic step for iterating the statmodel parameter. The grid determines the following iteration sequence of the statmodel parameter values: \f[(minVal, minVal*step, minVal*{step}^2, \dots, minVal*{logStep}^n),\f] where \f$n\f$ is the maximal index satisfying \f[\texttt{minVal} * \texttt{logStep} ^n < \texttt{maxVal}\f] - The grid is logarithmic, so logStep must always be greater then 1. Default value is 1. + The grid is logarithmic, so logStep must always be greater than 1. Default value is 1. */ - double logStep; + CV_PROP_RW double logStep; + + /** @brief Creates a ParamGrid Ptr that can be given to the %SVM::trainAuto method + + @param minVal minimum value of the parameter grid + @param maxVal maximum value of the parameter grid + @param logstep Logarithmic step for iterating the statmodel parameter + */ + CV_WRAP static Ptr create(double minVal=0., double maxVal=0., double logstep=1.); }; /** @brief Class encapsulating training data. @@ -190,6 +198,7 @@ class CV_EXPORTS_W TrainData CV_WRAP virtual Mat getTestSampleWeights() const = 0; CV_WRAP virtual Mat getVarIdx() const = 0; CV_WRAP virtual Mat getVarType() const = 0; + CV_WRAP virtual Mat getVarSymbolFlags() const = 0; CV_WRAP virtual int getResponseType() const = 0; CV_WRAP virtual Mat getTrainSampleIdx() const = 0; CV_WRAP virtual Mat getTestSampleIdx() const = 0; @@ -225,9 +234,23 @@ class CV_EXPORTS_W TrainData CV_WRAP virtual void shuffleTrainTest() = 0; /** @brief Returns matrix of test samples */ - CV_WRAP Mat getTestSamples() const; + CV_WRAP virtual Mat getTestSamples() const = 0; + + /** @brief Returns vector of symbolic names captured in loadFromCSV() */ + CV_WRAP virtual void getNames(std::vector& names) const = 0; + + /** @brief Extract from 1D vector elements specified by passed indexes. + @param vec input vector (supported types: CV_32S, CV_32F, CV_64F) + @param idx 1D index vector + */ + static CV_WRAP Mat getSubVector(const Mat& vec, const Mat& idx); - CV_WRAP static Mat getSubVector(const Mat& vec, const Mat& idx); + /** @brief Extract from matrix rows/cols specified by passed indexes. + @param matrix input matrix (supported types: CV_32S, CV_32F, CV_64F) + @param idx 1D index vector + @param layout specifies to extract rows (cv::ml::ROW_SAMPLES) or to extract columns (cv::ml::COL_SAMPLES) + */ + static CV_WRAP Mat getSubMatrix(const Mat& matrix, const Mat& idx, int layout); /** @brief Reads the dataset from a .csv file and returns the ready-to-use training data. @@ -306,7 +329,7 @@ class CV_EXPORTS_W StatModel : public Algorithm /** @brief Returns the number of variables in training samples */ CV_WRAP virtual int getVarCount() const = 0; - CV_WRAP virtual bool empty() const; + CV_WRAP virtual bool empty() const CV_OVERRIDE; /** @brief Returns true if the model is trained */ CV_WRAP virtual bool isTrained() const = 0; @@ -389,6 +412,17 @@ class CV_EXPORTS_W NormalBayesClassifier : public StatModel /** Creates empty model Use StatModel::train to train the model after creation. */ CV_WRAP static Ptr create(); + + /** @brief Loads and creates a serialized NormalBayesClassifier from a file + * + * Use NormalBayesClassifier::save to serialize and store an NormalBayesClassifier to disk. + * Load the NormalBayesClassifier from this file again, by calling this function with the path to the file. + * Optionally specify the node for the file containing the classifier + * + * @param filepath path to serialized NormalBayesClassifier + * @param nodeName name of node containing the classifier + */ + CV_WRAP static Ptr load(const String& filepath , const String& nodeName = String()); }; /****************************************************************************************\ @@ -471,6 +505,14 @@ class CV_EXPORTS_W KNearest : public StatModel The static method creates empty %KNearest classifier. It should be then trained using StatModel::train method. */ CV_WRAP static Ptr create(); + /** @brief Loads and creates a serialized knearest from a file + * + * Use KNearest::save to serialize and store an KNearest to disk. + * Load the KNearest from this file again, by calling this function with the path to the file. + * + * @param filepath path to serialized KNearest + */ + CV_WRAP static Ptr load(const String& filepath); }; /****************************************************************************************\ @@ -668,14 +710,54 @@ class CV_EXPORTS_W SVM : public StatModel the usual %SVM with parameters specified in params is executed. */ virtual bool trainAuto( const Ptr& data, int kFold = 10, - ParamGrid Cgrid = SVM::getDefaultGrid(SVM::C), - ParamGrid gammaGrid = SVM::getDefaultGrid(SVM::GAMMA), - ParamGrid pGrid = SVM::getDefaultGrid(SVM::P), - ParamGrid nuGrid = SVM::getDefaultGrid(SVM::NU), - ParamGrid coeffGrid = SVM::getDefaultGrid(SVM::COEF), - ParamGrid degreeGrid = SVM::getDefaultGrid(SVM::DEGREE), + ParamGrid Cgrid = getDefaultGrid(C), + ParamGrid gammaGrid = getDefaultGrid(GAMMA), + ParamGrid pGrid = getDefaultGrid(P), + ParamGrid nuGrid = getDefaultGrid(NU), + ParamGrid coeffGrid = getDefaultGrid(COEF), + ParamGrid degreeGrid = getDefaultGrid(DEGREE), bool balanced=false) = 0; + /** @brief Trains an %SVM with optimal parameters + + @param samples training samples + @param layout See ml::SampleTypes. + @param responses vector of responses associated with the training samples. + @param kFold Cross-validation parameter. The training set is divided into kFold subsets. One + subset is used to test the model, the others form the train set. So, the %SVM algorithm is + @param Cgrid grid for C + @param gammaGrid grid for gamma + @param pGrid grid for p + @param nuGrid grid for nu + @param coeffGrid grid for coeff + @param degreeGrid grid for degree + @param balanced If true and the problem is 2-class classification then the method creates more + balanced cross-validation subsets that is proportions between classes in subsets are close + to such proportion in the whole train dataset. + + The method trains the %SVM model automatically by choosing the optimal parameters C, gamma, p, + nu, coef0, degree. Parameters are considered optimal when the cross-validation + estimate of the test set error is minimal. + + This function only makes use of SVM::getDefaultGrid for parameter optimization and thus only + offers rudimentary parameter options. + + This function works for the classification (SVM::C_SVC or SVM::NU_SVC) as well as for the + regression (SVM::EPS_SVR or SVM::NU_SVR). If it is SVM::ONE_CLASS, no optimization is made and + the usual %SVM with parameters specified in params is executed. + */ + CV_WRAP virtual bool trainAuto(InputArray samples, + int layout, + InputArray responses, + int kFold = 10, + Ptr Cgrid = SVM::getDefaultGridPtr(SVM::C), + Ptr gammaGrid = SVM::getDefaultGridPtr(SVM::GAMMA), + Ptr pGrid = SVM::getDefaultGridPtr(SVM::P), + Ptr nuGrid = SVM::getDefaultGridPtr(SVM::NU), + Ptr coeffGrid = SVM::getDefaultGridPtr(SVM::COEF), + Ptr degreeGrid = SVM::getDefaultGridPtr(SVM::DEGREE), + bool balanced=false) = 0; + /** @brief Retrieves all the support vectors The method returns all the support vectors as a floating-point matrix, where support vectors are @@ -689,7 +771,7 @@ class CV_EXPORTS_W SVM : public StatModel support vector, used for prediction, was derived from. They are returned in a floating-point matrix, where the support vectors are stored as matrix rows. */ - CV_WRAP Mat getUncompressedSupportVectors() const; + CV_WRAP virtual Mat getUncompressedSupportVectors() const = 0; /** @brief Retrieves the decision function @@ -718,6 +800,16 @@ class CV_EXPORTS_W SVM : public StatModel */ static ParamGrid getDefaultGrid( int param_id ); + /** @brief Generates a grid for %SVM parameters. + + @param param_id %SVM parameters IDs that must be one of the SVM::ParamTypes. The grid is + generated for the parameter with this ID. + + The function generates a grid pointer for the specified parameter of the %SVM algorithm. + The grid may be passed to the function SVM::trainAuto. + */ + CV_WRAP static Ptr getDefaultGridPtr( int param_id ); + /** Creates empty model. Use StatModel::train to train the model. Since %SVM has several parameters, you may want to find the best parameters for your problem, it can be done with SVM::trainAuto. */ @@ -814,6 +906,15 @@ class CV_EXPORTS_W EM : public StatModel */ CV_WRAP virtual void getCovs(CV_OUT std::vector& covs) const = 0; + /** @brief Returns posterior probabilities for the provided samples + + @param samples The input samples, floating-point matrix + @param results The optional output \f$ nSamples \times nClusters\f$ matrix of results. It contains + posterior probabilities for each sample from the input + @param flags This parameter will be ignored + */ + CV_WRAP virtual float predict( InputArray samples, OutputArray results=noArray(), int flags=0 ) const CV_OVERRIDE = 0; + /** @brief Returns a likelihood logarithm value and an index of the most probable mixture component for the given sample. @@ -903,7 +1004,7 @@ class CV_EXPORTS_W EM : public StatModel @param samples Samples from which the Gaussian mixture model will be estimated. It should be a one-channel matrix, each row of which is a sample. If the matrix does not have CV_64F type it will be converted to the inner matrix of such type for the further computing. - @param probs0 + @param probs0 the probabilities @param logLikelihoods The optional output matrix that contains a likelihood logarithm value for each sample. It has \f$nsamples \times 1\f$ size and CV_64FC1 type. @param labels The optional output "class label" for each sample: @@ -923,6 +1024,17 @@ class CV_EXPORTS_W EM : public StatModel can use one of the EM::train\* methods or load it from file using Algorithm::load\(filename). */ CV_WRAP static Ptr create(); + + /** @brief Loads and creates a serialized EM from a file + * + * Use EM::save to serialize and store an EM to disk. + * Load the EM from this file again, by calling this function with the path to the file. + * Optionally specify the node for the file containing the classifier + * + * @param filepath path to serialized EM + * @param nodeName name of node containing the classifier + */ + CV_WRAP static Ptr load(const String& filepath , const String& nodeName = String()); }; /****************************************************************************************\ @@ -1111,6 +1223,17 @@ class CV_EXPORTS_W DTrees : public StatModel file using Algorithm::load\(filename). */ CV_WRAP static Ptr create(); + + /** @brief Loads and creates a serialized DTrees from a file + * + * Use DTree::save to serialize and store an DTree to disk. + * Load the DTree from this file again, by calling this function with the path to the file. + * Optionally specify the node for the file containing the classifier + * + * @param filepath path to serialized DTree + * @param nodeName name of node containing the classifier + */ + CV_WRAP static Ptr load(const String& filepath , const String& nodeName = String()); }; /****************************************************************************************\ @@ -1160,11 +1283,33 @@ class CV_EXPORTS_W RTrees : public DTrees */ CV_WRAP virtual Mat getVarImportance() const = 0; + /** Returns the result of each individual tree in the forest. + In case the model is a regression problem, the method will return each of the trees' + results for each of the sample cases. If the model is a classifier, it will return + a Mat with samples + 1 rows, where the first row gives the class number and the + following rows return the votes each class had for each sample. + @param samples Array containing the samples for which votes will be calculated. + @param results Array where the result of the calculation will be written. + @param flags Flags for defining the type of RTrees. + */ + CV_WRAP virtual void getVotes(InputArray samples, OutputArray results, int flags) const = 0; + /** Creates the empty model. Use StatModel::train to train the model, StatModel::train to create and train the model, Algorithm::load to load the pre-trained model. */ CV_WRAP static Ptr create(); + + /** @brief Loads and creates a serialized RTree from a file + * + * Use RTree::save to serialize and store an RTree to disk. + * Load the RTree from this file again, by calling this function with the path to the file. + * Optionally specify the node for the file containing the classifier + * + * @param filepath path to serialized RTree + * @param nodeName name of node containing the classifier + */ + CV_WRAP static Ptr load(const String& filepath , const String& nodeName = String()); }; /****************************************************************************************\ @@ -1214,6 +1359,17 @@ class CV_EXPORTS_W Boost : public DTrees /** Creates the empty model. Use StatModel::train to train the model, Algorithm::load\(filename) to load the pre-trained model. */ CV_WRAP static Ptr create(); + + /** @brief Loads and creates a serialized Boost from a file + * + * Use Boost::save to serialize and store an RTree to disk. + * Load the Boost from this file again, by calling this function with the path to the file. + * Optionally specify the node for the file containing the classifier + * + * @param filepath path to serialized Boost + * @param nodeName name of node containing the classifier + */ + CV_WRAP static Ptr load(const String& filepath , const String& nodeName = String()); }; /****************************************************************************************\ @@ -1269,13 +1425,14 @@ class CV_EXPORTS_W ANN_MLP : public StatModel /** Available training methods */ enum TrainingMethods { BACKPROP=0, //!< The back-propagation algorithm. - RPROP=1 //!< The RPROP algorithm. See @cite RPROP93 for details. + RPROP = 1, //!< The RPROP algorithm. See @cite RPROP93 for details. + ANNEAL = 2 //!< The simulated annealing algorithm. See @cite Kirkpatrick83 for details. }; /** Sets training method and common parameters. @param method Default value is ANN_MLP::RPROP. See ANN_MLP::TrainingMethods. - @param param1 passed to setRpropDW0 for ANN_MLP::RPROP and to setBackpropWeightScale for ANN_MLP::BACKPROP - @param param2 passed to setRpropDWMin for ANN_MLP::RPROP and to setBackpropMomentumScale for ANN_MLP::BACKPROP. + @param param1 passed to setRpropDW0 for ANN_MLP::RPROP and to setBackpropWeightScale for ANN_MLP::BACKPROP and to initialT for ANN_MLP::ANNEAL. + @param param2 passed to setRpropDWMin for ANN_MLP::RPROP and to setBackpropMomentumScale for ANN_MLP::BACKPROP and to finalT for ANN_MLP::ANNEAL. */ CV_WRAP virtual void setTrainMethod(int method, double param1 = 0, double param2 = 0) = 0; @@ -1362,18 +1519,53 @@ class CV_EXPORTS_W ANN_MLP : public StatModel /** @copybrief getRpropDWMax @see getRpropDWMax */ CV_WRAP virtual void setRpropDWMax(double val) = 0; + /** ANNEAL: Update initial temperature. + It must be \>=0. Default value is 10.*/ + /** @see setAnnealInitialT */ + CV_WRAP virtual double getAnnealInitialT() const = 0; + /** @copybrief getAnnealInitialT @see getAnnealInitialT */ + CV_WRAP virtual void setAnnealInitialT(double val) = 0; + + /** ANNEAL: Update final temperature. + It must be \>=0 and less than initialT. Default value is 0.1.*/ + /** @see setAnnealFinalT */ + CV_WRAP virtual double getAnnealFinalT() const = 0; + /** @copybrief getAnnealFinalT @see getAnnealFinalT */ + CV_WRAP virtual void setAnnealFinalT(double val) = 0; + + /** ANNEAL: Update cooling ratio. + It must be \>0 and less than 1. Default value is 0.95.*/ + /** @see setAnnealCoolingRatio */ + CV_WRAP virtual double getAnnealCoolingRatio() const = 0; + /** @copybrief getAnnealCoolingRatio @see getAnnealCoolingRatio */ + CV_WRAP virtual void setAnnealCoolingRatio(double val) = 0; + + /** ANNEAL: Update iteration per step. + It must be \>0 . Default value is 10.*/ + /** @see setAnnealItePerStep */ + CV_WRAP virtual int getAnnealItePerStep() const = 0; + /** @copybrief getAnnealItePerStep @see getAnnealItePerStep */ + CV_WRAP virtual void setAnnealItePerStep(int val) = 0; + + /** @brief Set/initialize anneal RNG */ + virtual void setAnnealEnergyRNG(const RNG& rng) = 0; + /** possible activation functions */ enum ActivationFunctions { /** Identity function: \f$f(x)=x\f$ */ IDENTITY = 0, - /** Symmetrical sigmoid: \f$f(x)=\beta*(1-e^{-\alpha x})/(1+e^{-\alpha x}\f$ + /** Symmetrical sigmoid: \f$f(x)=\beta*(1-e^{-\alpha x})/(1+e^{-\alpha x})\f$ @note If you are using the default sigmoid activation function with the default parameter values fparam1=0 and fparam2=0 then the function used is y = 1.7159\*tanh(2/3 \* x), so the output will range from [-1.7159, 1.7159], instead of [0,1].*/ SIGMOID_SYM = 1, /** Gaussian function: \f$f(x)=\beta e^{-\alpha x*x}\f$ */ - GAUSSIAN = 2 + GAUSSIAN = 2, + /** ReLU function: \f$f(x)=max(0,x)\f$ */ + RELU = 3, + /** Leaky ReLU function: for x>0 \f$f(x)=x \f$ and x<=0 \f$f(x)=\alpha x \f$*/ + LEAKYRELU= 4 }; /** Train options */ @@ -1413,6 +1605,10 @@ class CV_EXPORTS_W ANN_MLP : public StatModel }; +#ifndef DISABLE_OPENCV_3_COMPATIBILITY +typedef ANN_MLP ANN_MLP_ANNEAL; +#endif + /****************************************************************************************\ * Logistic Regression * \****************************************************************************************/ @@ -1483,11 +1679,11 @@ class CV_EXPORTS_W LogisticRegression : public StatModel @param results Predicted labels as a column matrix of type CV_32S. @param flags Not used. */ - CV_WRAP virtual float predict( InputArray samples, OutputArray results=noArray(), int flags=0 ) const = 0; + CV_WRAP virtual float predict( InputArray samples, OutputArray results=noArray(), int flags=0 ) const CV_OVERRIDE = 0; - /** @brief This function returns the trained paramters arranged across rows. + /** @brief This function returns the trained parameters arranged across rows. - For a two class classifcation problem, it returns a row matrix. It returns learnt paramters of + For a two class classification problem, it returns a row matrix. It returns learnt parameters of the Logistic Regression as a matrix of type CV_32F. */ CV_WRAP virtual Mat get_learnt_thetas() const = 0; @@ -1497,10 +1693,191 @@ class CV_EXPORTS_W LogisticRegression : public StatModel Creates Logistic Regression model with parameters given. */ CV_WRAP static Ptr create(); + + /** @brief Loads and creates a serialized LogisticRegression from a file + * + * Use LogisticRegression::save to serialize and store an LogisticRegression to disk. + * Load the LogisticRegression from this file again, by calling this function with the path to the file. + * Optionally specify the node for the file containing the classifier + * + * @param filepath path to serialized LogisticRegression + * @param nodeName name of node containing the classifier + */ + CV_WRAP static Ptr load(const String& filepath , const String& nodeName = String()); }; + /****************************************************************************************\ -* Auxilary functions declarations * +* Stochastic Gradient Descent SVM Classifier * +\****************************************************************************************/ + +/*! +@brief Stochastic Gradient Descent SVM classifier + +SVMSGD provides a fast and easy-to-use implementation of the SVM classifier using the Stochastic Gradient Descent approach, +as presented in @cite bottou2010large. + +The classifier has following parameters: +- model type, +- margin type, +- margin regularization (\f$\lambda\f$), +- initial step size (\f$\gamma_0\f$), +- step decreasing power (\f$c\f$), +- and termination criteria. + +The model type may have one of the following values: \ref SGD and \ref ASGD. + +- \ref SGD is the classic version of SVMSGD classifier: every next step is calculated by the formula + \f[w_{t+1} = w_t - \gamma(t) \frac{dQ_i}{dw} |_{w = w_t}\f] + where + - \f$w_t\f$ is the weights vector for decision function at step \f$t\f$, + - \f$\gamma(t)\f$ is the step size of model parameters at the iteration \f$t\f$, it is decreased on each step by the formula + \f$\gamma(t) = \gamma_0 (1 + \lambda \gamma_0 t) ^ {-c}\f$ + - \f$Q_i\f$ is the target functional from SVM task for sample with number \f$i\f$, this sample is chosen stochastically on each step of the algorithm. + +- \ref ASGD is Average Stochastic Gradient Descent SVM Classifier. ASGD classifier averages weights vector on each step of algorithm by the formula +\f$\widehat{w}_{t+1} = \frac{t}{1+t}\widehat{w}_{t} + \frac{1}{1+t}w_{t+1}\f$ + +The recommended model type is ASGD (following @cite bottou2010large). + +The margin type may have one of the following values: \ref SOFT_MARGIN or \ref HARD_MARGIN. + +- You should use \ref HARD_MARGIN type, if you have linearly separable sets. +- You should use \ref SOFT_MARGIN type, if you have non-linearly separable sets or sets with outliers. +- In the general case (if you know nothing about linear separability of your sets), use SOFT_MARGIN. + +The other parameters may be described as follows: +- Margin regularization parameter is responsible for weights decreasing at each step and for the strength of restrictions on outliers + (the less the parameter, the less probability that an outlier will be ignored). + Recommended value for SGD model is 0.0001, for ASGD model is 0.00001. + +- Initial step size parameter is the initial value for the step size \f$\gamma(t)\f$. + You will have to find the best initial step for your problem. + +- Step decreasing power is the power parameter for \f$\gamma(t)\f$ decreasing by the formula, mentioned above. + Recommended value for SGD model is 1, for ASGD model is 0.75. + +- Termination criteria can be TermCriteria::COUNT, TermCriteria::EPS or TermCriteria::COUNT + TermCriteria::EPS. + You will have to find the best termination criteria for your problem. + +Note that the parameters margin regularization, initial step size, and step decreasing power should be positive. + +To use SVMSGD algorithm do as follows: + +- first, create the SVMSGD object. The algorithm will set optimal parameters by default, but you can set your own parameters via functions setSvmsgdType(), + setMarginType(), setMarginRegularization(), setInitialStepSize(), and setStepDecreasingPower(). + +- then the SVM model can be trained using the train features and the correspondent labels by the method train(). + +- after that, the label of a new feature vector can be predicted using the method predict(). + +@code +// Create empty object +cv::Ptr svmsgd = SVMSGD::create(); + +// Train the Stochastic Gradient Descent SVM +svmsgd->train(trainData); + +// Predict labels for the new samples +svmsgd->predict(samples, responses); +@endcode + +*/ + +class CV_EXPORTS_W SVMSGD : public cv::ml::StatModel +{ +public: + + /** SVMSGD type. + ASGD is often the preferable choice. */ + enum SvmsgdType + { + SGD, //!< Stochastic Gradient Descent + ASGD //!< Average Stochastic Gradient Descent + }; + + /** Margin type.*/ + enum MarginType + { + SOFT_MARGIN, //!< General case, suits to the case of non-linearly separable sets, allows outliers. + HARD_MARGIN //!< More accurate for the case of linearly separable sets. + }; + + /** + * @return the weights of the trained model (decision function f(x) = weights * x + shift). + */ + CV_WRAP virtual Mat getWeights() = 0; + + /** + * @return the shift of the trained model (decision function f(x) = weights * x + shift). + */ + CV_WRAP virtual float getShift() = 0; + + /** @brief Creates empty model. + * Use StatModel::train to train the model. Since %SVMSGD has several parameters, you may want to + * find the best parameters for your problem or use setOptimalParameters() to set some default parameters. + */ + CV_WRAP static Ptr create(); + + /** @brief Loads and creates a serialized SVMSGD from a file + * + * Use SVMSGD::save to serialize and store an SVMSGD to disk. + * Load the SVMSGD from this file again, by calling this function with the path to the file. + * Optionally specify the node for the file containing the classifier + * + * @param filepath path to serialized SVMSGD + * @param nodeName name of node containing the classifier + */ + CV_WRAP static Ptr load(const String& filepath , const String& nodeName = String()); + + /** @brief Function sets optimal parameters values for chosen SVM SGD model. + * @param svmsgdType is the type of SVMSGD classifier. + * @param marginType is the type of margin constraint. + */ + CV_WRAP virtual void setOptimalParameters(int svmsgdType = SVMSGD::ASGD, int marginType = SVMSGD::SOFT_MARGIN) = 0; + + /** @brief %Algorithm type, one of SVMSGD::SvmsgdType. */ + /** @see setSvmsgdType */ + CV_WRAP virtual int getSvmsgdType() const = 0; + /** @copybrief getSvmsgdType @see getSvmsgdType */ + CV_WRAP virtual void setSvmsgdType(int svmsgdType) = 0; + + /** @brief %Margin type, one of SVMSGD::MarginType. */ + /** @see setMarginType */ + CV_WRAP virtual int getMarginType() const = 0; + /** @copybrief getMarginType @see getMarginType */ + CV_WRAP virtual void setMarginType(int marginType) = 0; + + /** @brief Parameter marginRegularization of a %SVMSGD optimization problem. */ + /** @see setMarginRegularization */ + CV_WRAP virtual float getMarginRegularization() const = 0; + /** @copybrief getMarginRegularization @see getMarginRegularization */ + CV_WRAP virtual void setMarginRegularization(float marginRegularization) = 0; + + /** @brief Parameter initialStepSize of a %SVMSGD optimization problem. */ + /** @see setInitialStepSize */ + CV_WRAP virtual float getInitialStepSize() const = 0; + /** @copybrief getInitialStepSize @see getInitialStepSize */ + CV_WRAP virtual void setInitialStepSize(float InitialStepSize) = 0; + + /** @brief Parameter stepDecreasingPower of a %SVMSGD optimization problem. */ + /** @see setStepDecreasingPower */ + CV_WRAP virtual float getStepDecreasingPower() const = 0; + /** @copybrief getStepDecreasingPower @see getStepDecreasingPower */ + CV_WRAP virtual void setStepDecreasingPower(float stepDecreasingPower) = 0; + + /** @brief Termination criteria of the training algorithm. + You can specify the maximum number of iterations (maxCount) and/or how much the error could + change between the iterations to make the algorithm continue (epsilon).*/ + /** @see setTermCriteria */ + CV_WRAP virtual TermCriteria getTermCriteria() const = 0; + /** @copybrief getTermCriteria @see getTermCriteria */ + CV_WRAP virtual void setTermCriteria(const cv::TermCriteria &val) = 0; +}; + + +/****************************************************************************************\ +* Auxiliary functions declarations * \****************************************************************************************/ /** @brief Generates _sample_ from multivariate normal distribution @@ -1516,12 +1893,55 @@ CV_EXPORTS void randMVNormal( InputArray mean, InputArray cov, int nsamples, Out CV_EXPORTS void createConcentricSpheresTestSet( int nsamples, int nfeatures, int nclasses, OutputArray samples, OutputArray responses); + +/****************************************************************************************\ +* Simulated annealing solver * +\****************************************************************************************/ + +#ifdef CV_DOXYGEN +/** @brief This class declares example interface for system state used in simulated annealing optimization algorithm. + +@note This class is not defined in C++ code and can't be use directly - you need your own implementation with the same methods. +*/ +struct SimulatedAnnealingSolverSystem +{ + /** Give energy value for a state of system.*/ + double energy() const; + /** Function which change the state of system (random perturbation).*/ + void changeState(); + /** Function to reverse to the previous state. Can be called once only after changeState(). */ + void reverseState(); +}; +#endif // CV_DOXYGEN + +/** @brief The class implements simulated annealing for optimization. + +@cite Kirkpatrick83 for details + +@param solverSystem optimization system (see SimulatedAnnealingSolverSystem) +@param initialTemperature initial temperature +@param finalTemperature final temperature +@param coolingRatio temperature step multiplies +@param iterationsPerStep number of iterations per temperature changing step +@param lastTemperature optional output for last used temperature +@param rngEnergy specify custom random numbers generator (cv::theRNG() by default) +*/ +template +int simulatedAnnealingSolver(SimulatedAnnealingSolverSystem& solverSystem, + double initialTemperature, double finalTemperature, double coolingRatio, + size_t iterationsPerStep, + CV_OUT double* lastTemperature = NULL, + cv::RNG& rngEnergy = cv::theRNG() +); + //! @} ml } } +#include + #endif // __cplusplus -#endif // __OPENCV_ML_HPP__ +#endif // OPENCV_ML_HPP /* End of file. */ diff --git a/IPL/include/opencv/opencv2/ml/ml.inl.hpp b/IPL/include/opencv/opencv2/ml/ml.inl.hpp new file mode 100644 index 0000000..dc9c783 --- /dev/null +++ b/IPL/include/opencv/opencv2/ml/ml.inl.hpp @@ -0,0 +1,60 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#ifndef OPENCV_ML_INL_HPP +#define OPENCV_ML_INL_HPP + +namespace cv { namespace ml { + +// declared in ml.hpp +template +int simulatedAnnealingSolver(SimulatedAnnealingSolverSystem& solverSystem, + double initialTemperature, double finalTemperature, double coolingRatio, + size_t iterationsPerStep, + CV_OUT double* lastTemperature, + cv::RNG& rngEnergy +) +{ + CV_Assert(finalTemperature > 0); + CV_Assert(initialTemperature > finalTemperature); + CV_Assert(iterationsPerStep > 0); + CV_Assert(coolingRatio < 1.0f); + double Ti = initialTemperature; + double previousEnergy = solverSystem.energy(); + int exchange = 0; + while (Ti > finalTemperature) + { + for (size_t i = 0; i < iterationsPerStep; i++) + { + solverSystem.changeState(); + double newEnergy = solverSystem.energy(); + if (newEnergy < previousEnergy) + { + previousEnergy = newEnergy; + exchange++; + } + else + { + double r = rngEnergy.uniform(0.0, 1.0); + if (r < std::exp(-(newEnergy - previousEnergy) / Ti)) + { + previousEnergy = newEnergy; + exchange++; + } + else + { + solverSystem.reverseState(); + } + } + } + Ti *= coolingRatio; + } + if (lastTemperature) + *lastTemperature = Ti; + return exchange; +} + +}} //namespace + +#endif // OPENCV_ML_INL_HPP diff --git a/IPL/include/opencv/opencv2/objdetect.hpp b/IPL/include/opencv/opencv2/objdetect.hpp index 6587b3d..097c592 100644 --- a/IPL/include/opencv/opencv2/objdetect.hpp +++ b/IPL/include/opencv/opencv2/objdetect.hpp @@ -41,8 +41,8 @@ // //M*/ -#ifndef __OPENCV_OBJDETECT_HPP__ -#define __OPENCV_OBJDETECT_HPP__ +#ifndef OPENCV_OBJDETECT_HPP +#define OPENCV_OBJDETECT_HPP #include "opencv2/core.hpp" @@ -91,7 +91,7 @@ compensate for the differences in the size of areas. The sums of pixel values ov regions are calculated rapidly using integral images (see below and the integral description). To see the object detector at work, have a look at the facedetect demo: - + The following reference is for the detection part only. There is a separate application called opencv_traincascade that can train a cascade of boosted classifiers from a set of samples. @@ -124,7 +124,7 @@ class CV_EXPORTS SimilarRects SimilarRects(double _eps) : eps(_eps) {} inline bool operator()(const Rect& r1, const Rect& r2) const { - double delta = eps*(std::min(r1.width, r2.width) + std::min(r1.height, r2.height))*0.5; + double delta = eps * ((std::min)(r1.width, r2.width) + (std::min)(r1.height, r2.height)) * 0.5; return std::abs(r1.x - r2.x) <= delta && std::abs(r1.y - r2.y) <= delta && std::abs(r1.x + r1.width - r2.x - r2.width) <= delta && @@ -163,7 +163,7 @@ CV_EXPORTS void groupRectangles_meanshift(std::vector& rectList, std::ve std::vector& foundScales, double detectThreshold = 0.0, Size winDetSize = Size(64, 128)); -template<> CV_EXPORTS void DefaultDeleter::operator ()(CvHaarClassifierCascade* obj) const; +template<> struct DefaultDeleter{ CV_EXPORTS void operator ()(CvHaarClassifierCascade* obj) const; }; enum { CASCADE_DO_CANNY_PRUNING = 1, CASCADE_SCALE_IMAGE = 2, @@ -175,7 +175,7 @@ class CV_EXPORTS_W BaseCascadeClassifier : public Algorithm { public: virtual ~BaseCascadeClassifier(); - virtual bool empty() const = 0; + virtual bool empty() const CV_OVERRIDE = 0; virtual bool load( const String& filename ) = 0; virtual void detectMultiScale( InputArray image, CV_OUT std::vector& objects, @@ -215,6 +215,10 @@ class CV_EXPORTS_W BaseCascadeClassifier : public Algorithm virtual Ptr getMaskGenerator() = 0; }; +/** @example samples/cpp/facedetect.cpp +This program demonstrates usage of the Cascade classifier class +\image html Cascade_Classifier_Tutorial_Result_Haar.jpg "Sample screenshot" width=321 height=254 +*/ /** @brief Cascade classifier class for object detection. */ class CV_EXPORTS_W CascadeClassifier @@ -255,7 +259,7 @@ class CV_EXPORTS_W CascadeClassifier @param flags Parameter with the same meaning for an old cascade as in the function cvHaarDetectObjects. It is not used for a new cascade. @param minSize Minimum possible object size. Objects smaller than that are ignored. - @param maxSize Maximum possible object size. Objects larger than that are ignored. + @param maxSize Maximum possible object size. Objects larger than that are ignored. If `maxSize == minSize` model is evaluated on single scale. The function is parallelized with the TBB library. @@ -283,7 +287,7 @@ class CV_EXPORTS_W CascadeClassifier @param flags Parameter with the same meaning for an old cascade as in the function cvHaarDetectObjects. It is not used for a new cascade. @param minSize Minimum possible object size. Objects smaller than that are ignored. - @param maxSize Maximum possible object size. Objects larger than that are ignored. + @param maxSize Maximum possible object size. Objects larger than that are ignored. If `maxSize == minSize` model is evaluated on single scale. */ CV_WRAP_AS(detectMultiScale2) void detectMultiScale( InputArray image, CV_OUT std::vector& objects, @@ -294,7 +298,21 @@ class CV_EXPORTS_W CascadeClassifier Size maxSize=Size() ); /** @overload - if `outputRejectLevels` is `true` returns `rejectLevels` and `levelWeights` + This function allows you to retrieve the final stage decision certainty of classification. + For this, one needs to set `outputRejectLevels` on true and provide the `rejectLevels` and `levelWeights` parameter. + For each resulting detection, `levelWeights` will then contain the certainty of classification at the final stage. + This value can then be used to separate strong from weaker classifications. + + A code sample on how to use it efficiently can be found below: + @code + Mat img; + vector weights; + vector levels; + vector detections; + CascadeClassifier model("/path/to/your/model.xml"); + model.detectMultiScale(img, detections, levels, weights, 1.1, 3, 0, Size(), Size(), true); + cerr << "Detection " << detections[0] << " with weight " << weights[0] << endl; + @endcode */ CV_WRAP_AS(detectMultiScale3) void detectMultiScale( InputArray image, CV_OUT std::vector& objects, @@ -328,29 +346,65 @@ struct DetectionROI { //! scale(size) of the bounding box double scale; - //! set of requrested locations to be evaluated + //! set of requested locations to be evaluated std::vector locations; //! vector that will contain confidence values for each location std::vector confidences; }; +/**@brief Implementation of HOG (Histogram of Oriented Gradients) descriptor and object detector. + +the HOG descriptor algorithm introduced by Navneet Dalal and Bill Triggs @cite Dalal2005 . + +useful links: + +https://hal.inria.fr/inria-00548512/document/ + +https://en.wikipedia.org/wiki/Histogram_of_oriented_gradients + +https://software.intel.com/en-us/ipp-dev-reference-histogram-of-oriented-gradients-hog-descriptor + +http://www.learnopencv.com/histogram-of-oriented-gradients + +http://www.learnopencv.com/handwritten-digits-classification-an-opencv-c-python-tutorial + + */ struct CV_EXPORTS_W HOGDescriptor { public: - enum { L2Hys = 0 + enum HistogramNormType { L2Hys = 0 //!< Default histogramNormType }; - enum { DEFAULT_NLEVELS = 64 + enum { DEFAULT_NLEVELS = 64 //!< Default nlevels value. }; + enum DescriptorStorageFormat { DESCR_FORMAT_COL_BY_COL, DESCR_FORMAT_ROW_BY_ROW }; + /**@brief Creates the HOG descriptor and detector with default params. + + aqual to HOGDescriptor(Size(64,128), Size(16,16), Size(8,8), Size(8,8), 9 ) + */ CV_WRAP HOGDescriptor() : winSize(64,128), blockSize(16,16), blockStride(8,8), cellSize(8,8), nbins(9), derivAperture(1), winSigma(-1), histogramNormType(HOGDescriptor::L2Hys), L2HysThreshold(0.2), gammaCorrection(true), free_coef(-1.f), nlevels(HOGDescriptor::DEFAULT_NLEVELS), signedGradient(false) {} + /** @overload + @param _winSize sets winSize with given value. + @param _blockSize sets blockSize with given value. + @param _blockStride sets blockStride with given value. + @param _cellSize sets cellSize with given value. + @param _nbins sets nbins with given value. + @param _derivAperture sets derivAperture with given value. + @param _winSigma sets winSigma with given value. + @param _histogramNormType sets histogramNormType with given value. + @param _L2HysThreshold sets L2HysThreshold with given value. + @param _gammaCorrection sets gammaCorrection with given value. + @param _nlevels sets nlevels with given value. + @param _signedGradient sets signedGradient with given value. + */ CV_WRAP HOGDescriptor(Size _winSize, Size _blockSize, Size _blockStride, Size _cellSize, int _nbins, int _derivAperture=1, double _winSigma=-1, - int _histogramNormType=HOGDescriptor::L2Hys, + HOGDescriptor::HistogramNormType _histogramNormType=HOGDescriptor::L2Hys, double _L2HysThreshold=0.2, bool _gammaCorrection=false, int _nlevels=HOGDescriptor::DEFAULT_NLEVELS, bool _signedGradient=false) : winSize(_winSize), blockSize(_blockSize), blockStride(_blockStride), cellSize(_cellSize), @@ -359,108 +413,341 @@ struct CV_EXPORTS_W HOGDescriptor gammaCorrection(_gammaCorrection), free_coef(-1.f), nlevels(_nlevels), signedGradient(_signedGradient) {} + /** @overload + @param filename The file name containing HOGDescriptor properties and coefficients for the linear SVM classifier. + */ CV_WRAP HOGDescriptor(const String& filename) { load(filename); } + /** @overload + @param d the HOGDescriptor which cloned to create a new one. + */ HOGDescriptor(const HOGDescriptor& d) { d.copyTo(*this); } + /**@brief Default destructor. + */ virtual ~HOGDescriptor() {} + /**@brief Returns the number of coefficients required for the classification. + */ CV_WRAP size_t getDescriptorSize() const; + + /** @brief Checks if detector size equal to descriptor size. + */ CV_WRAP bool checkDetectorSize() const; + + /** @brief Returns winSigma value + */ CV_WRAP double getWinSigma() const; - CV_WRAP virtual void setSVMDetector(InputArray _svmdetector); + /**@example samples/cpp/peopledetect.cpp + */ + /**@brief Sets coefficients for the linear SVM classifier. + @param svmdetector coefficients for the linear SVM classifier. + */ + CV_WRAP virtual void setSVMDetector(InputArray svmdetector); + /** @brief Reads HOGDescriptor parameters from a cv::FileNode. + @param fn File node + */ virtual bool read(FileNode& fn); + + /** @brief Stores HOGDescriptor parameters in a cv::FileStorage. + @param fs File storage + @param objname Object name + */ virtual void write(FileStorage& fs, const String& objname) const; + /** @brief loads HOGDescriptor parameters and coefficients for the linear SVM classifier from a file. + @param filename Path of the file to read. + @param objname The optional name of the node to read (if empty, the first top-level node will be used). + */ CV_WRAP virtual bool load(const String& filename, const String& objname = String()); + + /** @brief saves HOGDescriptor parameters and coefficients for the linear SVM classifier to a file + @param filename File name + @param objname Object name + */ CV_WRAP virtual void save(const String& filename, const String& objname = String()) const; + + /** @brief clones the HOGDescriptor + @param c cloned HOGDescriptor + */ virtual void copyTo(HOGDescriptor& c) const; + /**@example samples/cpp/train_HOG.cpp + */ + /** @brief Computes HOG descriptors of given image. + @param img Matrix of the type CV_8U containing an image where HOG features will be calculated. + @param descriptors Matrix of the type CV_32F + @param winStride Window stride. It must be a multiple of block stride. + @param padding Padding + @param locations Vector of Point + */ CV_WRAP virtual void compute(InputArray img, CV_OUT std::vector& descriptors, Size winStride = Size(), Size padding = Size(), const std::vector& locations = std::vector()) const; - //! with found weights output - CV_WRAP virtual void detect(const Mat& img, CV_OUT std::vector& foundLocations, + /** @brief Performs object detection without a multi-scale window. + @param img Matrix of the type CV_8U or CV_8UC3 containing an image where objects are detected. + @param foundLocations Vector of point where each point contains left-top corner point of detected object boundaries. + @param weights Vector that will contain confidence values for each detected object. + @param hitThreshold Threshold for the distance between features and SVM classifying plane. + Usually it is 0 and should be specified in the detector coefficients (as the last free coefficient). + But if the free coefficient is omitted (which is allowed), you can specify it manually here. + @param winStride Window stride. It must be a multiple of block stride. + @param padding Padding + @param searchLocations Vector of Point includes set of requested locations to be evaluated. + */ + CV_WRAP virtual void detect(InputArray img, CV_OUT std::vector& foundLocations, CV_OUT std::vector& weights, double hitThreshold = 0, Size winStride = Size(), Size padding = Size(), const std::vector& searchLocations = std::vector()) const; - //! without found weights output - virtual void detect(const Mat& img, CV_OUT std::vector& foundLocations, + + /** @brief Performs object detection without a multi-scale window. + @param img Matrix of the type CV_8U or CV_8UC3 containing an image where objects are detected. + @param foundLocations Vector of point where each point contains left-top corner point of detected object boundaries. + @param hitThreshold Threshold for the distance between features and SVM classifying plane. + Usually it is 0 and should be specified in the detector coefficients (as the last free coefficient). + But if the free coefficient is omitted (which is allowed), you can specify it manually here. + @param winStride Window stride. It must be a multiple of block stride. + @param padding Padding + @param searchLocations Vector of Point includes locations to search. + */ + virtual void detect(InputArray img, CV_OUT std::vector& foundLocations, double hitThreshold = 0, Size winStride = Size(), Size padding = Size(), const std::vector& searchLocations=std::vector()) const; - //! with result weights output + /** @brief Detects objects of different sizes in the input image. The detected objects are returned as a list + of rectangles. + @param img Matrix of the type CV_8U or CV_8UC3 containing an image where objects are detected. + @param foundLocations Vector of rectangles where each rectangle contains the detected object. + @param foundWeights Vector that will contain confidence values for each detected object. + @param hitThreshold Threshold for the distance between features and SVM classifying plane. + Usually it is 0 and should be specified in the detector coefficients (as the last free coefficient). + But if the free coefficient is omitted (which is allowed), you can specify it manually here. + @param winStride Window stride. It must be a multiple of block stride. + @param padding Padding + @param scale Coefficient of the detection window increase. + @param finalThreshold Final threshold + @param useMeanshiftGrouping indicates grouping algorithm + */ CV_WRAP virtual void detectMultiScale(InputArray img, CV_OUT std::vector& foundLocations, CV_OUT std::vector& foundWeights, double hitThreshold = 0, Size winStride = Size(), Size padding = Size(), double scale = 1.05, double finalThreshold = 2.0,bool useMeanshiftGrouping = false) const; - //! without found weights output + + /** @brief Detects objects of different sizes in the input image. The detected objects are returned as a list + of rectangles. + @param img Matrix of the type CV_8U or CV_8UC3 containing an image where objects are detected. + @param foundLocations Vector of rectangles where each rectangle contains the detected object. + @param hitThreshold Threshold for the distance between features and SVM classifying plane. + Usually it is 0 and should be specified in the detector coefficients (as the last free coefficient). + But if the free coefficient is omitted (which is allowed), you can specify it manually here. + @param winStride Window stride. It must be a multiple of block stride. + @param padding Padding + @param scale Coefficient of the detection window increase. + @param finalThreshold Final threshold + @param useMeanshiftGrouping indicates grouping algorithm + */ virtual void detectMultiScale(InputArray img, CV_OUT std::vector& foundLocations, double hitThreshold = 0, Size winStride = Size(), Size padding = Size(), double scale = 1.05, double finalThreshold = 2.0, bool useMeanshiftGrouping = false) const; - CV_WRAP virtual void computeGradient(const Mat& img, CV_OUT Mat& grad, CV_OUT Mat& angleOfs, + /** @brief Computes gradients and quantized gradient orientations. + @param img Matrix contains the image to be computed + @param grad Matrix of type CV_32FC2 contains computed gradients + @param angleOfs Matrix of type CV_8UC2 contains quantized gradient orientations + @param paddingTL Padding from top-left + @param paddingBR Padding from bottom-right + */ + CV_WRAP virtual void computeGradient(InputArray img, InputOutputArray grad, InputOutputArray angleOfs, Size paddingTL = Size(), Size paddingBR = Size()) const; + /** @brief Returns coefficients of the classifier trained for people detection (for 64x128 windows). + */ CV_WRAP static std::vector getDefaultPeopleDetector(); + + /**@example samples/tapi/hog.cpp + */ + /** @brief Returns coefficients of the classifier trained for people detection (for 48x96 windows). + */ CV_WRAP static std::vector getDaimlerPeopleDetector(); + //! Detection window size. Align to block size and block stride. Default value is Size(64,128). CV_PROP Size winSize; + + //! Block size in pixels. Align to cell size. Default value is Size(16,16). CV_PROP Size blockSize; + + //! Block stride. It must be a multiple of cell size. Default value is Size(8,8). CV_PROP Size blockStride; + + //! Cell size. Default value is Size(8,8). CV_PROP Size cellSize; + + //! Number of bins used in the calculation of histogram of gradients. Default value is 9. CV_PROP int nbins; + + //! not documented CV_PROP int derivAperture; + + //! Gaussian smoothing window parameter. CV_PROP double winSigma; - CV_PROP int histogramNormType; + + //! histogramNormType + CV_PROP HOGDescriptor::HistogramNormType histogramNormType; + + //! L2-Hys normalization method shrinkage. CV_PROP double L2HysThreshold; + + //! Flag to specify whether the gamma correction preprocessing is required or not. CV_PROP bool gammaCorrection; + + //! coefficients for the linear SVM classifier. CV_PROP std::vector svmDetector; + + //! coefficients for the linear SVM classifier used when OpenCL is enabled UMat oclSvmDetector; + + //! not documented float free_coef; + + //! Maximum number of detection window increases. Default value is 64 CV_PROP int nlevels; - CV_PROP bool signedGradient; + //! Indicates signed gradient will be used or not + CV_PROP bool signedGradient; - //! evaluate specified ROI and return confidence value for each location - virtual void detectROI(const cv::Mat& img, const std::vector &locations, + /** @brief evaluate specified ROI and return confidence value for each location + @param img Matrix of the type CV_8U or CV_8UC3 containing an image where objects are detected. + @param locations Vector of Point + @param foundLocations Vector of Point where each Point is detected object's top-left point. + @param confidences confidences + @param hitThreshold Threshold for the distance between features and SVM classifying plane. Usually + it is 0 and should be specified in the detector coefficients (as the last free coefficient). But if + the free coefficient is omitted (which is allowed), you can specify it manually here + @param winStride winStride + @param padding padding + */ + virtual void detectROI(InputArray img, const std::vector &locations, CV_OUT std::vector& foundLocations, CV_OUT std::vector& confidences, double hitThreshold = 0, cv::Size winStride = Size(), cv::Size padding = Size()) const; - //! evaluate specified ROI and return confidence value for each location in multiple scales - virtual void detectMultiScaleROI(const cv::Mat& img, - CV_OUT std::vector& foundLocations, - std::vector& locations, - double hitThreshold = 0, - int groupThreshold = 0) const; - - //! read/parse Dalal's alt model file - void readALTModel(String modelfile); + /** @brief evaluate specified ROI and return confidence value for each location in multiple scales + @param img Matrix of the type CV_8U or CV_8UC3 containing an image where objects are detected. + @param foundLocations Vector of rectangles where each rectangle contains the detected object. + @param locations Vector of DetectionROI + @param hitThreshold Threshold for the distance between features and SVM classifying plane. Usually it is 0 and should be specified + in the detector coefficients (as the last free coefficient). But if the free coefficient is omitted (which is allowed), you can specify it manually here. + @param groupThreshold Minimum possible number of rectangles minus 1. The threshold is used in a group of rectangles to retain it. + */ + virtual void detectMultiScaleROI(InputArray img, + CV_OUT std::vector& foundLocations, + std::vector& locations, + double hitThreshold = 0, + int groupThreshold = 0) const; + + /** @brief Groups the object candidate rectangles. + @param rectList Input/output vector of rectangles. Output vector includes retained and grouped rectangles. (The Python list is not modified in place.) + @param weights Input/output vector of weights of rectangles. Output vector includes weights of retained and grouped rectangles. (The Python list is not modified in place.) + @param groupThreshold Minimum possible number of rectangles minus 1. The threshold is used in a group of rectangles to retain it. + @param eps Relative difference between sides of the rectangles to merge them into a group. + */ void groupRectangles(std::vector& rectList, std::vector& weights, int groupThreshold, double eps) const; }; -//! @} objdetect +class CV_EXPORTS_W QRCodeDetector +{ +public: + CV_WRAP QRCodeDetector(); + ~QRCodeDetector(); + + /** @brief sets the epsilon used during the horizontal scan of QR code stop marker detection. + @param epsX Epsilon neighborhood, which allows you to determine the horizontal pattern + of the scheme 1:1:3:1:1 according to QR code standard. + */ + CV_WRAP void setEpsX(double epsX); + /** @brief sets the epsilon used during the vertical scan of QR code stop marker detection. + @param epsY Epsilon neighborhood, which allows you to determine the vertical pattern + of the scheme 1:1:3:1:1 according to QR code standard. + */ + CV_WRAP void setEpsY(double epsY); + + /** @brief Detects QR code in image and returns the quadrangle containing the code. + @param img grayscale or color (BGR) image containing (or not) QR code. + @param points Output vector of vertices of the minimum-area quadrangle containing the code. + */ + CV_WRAP bool detect(InputArray img, OutputArray points) const; + + /** @brief Decodes QR code in image once it's found by the detect() method. + + Returns UTF8-encoded output string or empty string if the code cannot be decoded. + @param img grayscale or color (BGR) image containing QR code. + @param points Quadrangle vertices found by detect() method (or some other algorithm). + @param straight_qrcode The optional output image containing rectified and binarized QR code + */ + CV_WRAP std::string decode(InputArray img, InputArray points, OutputArray straight_qrcode = noArray()); + + /** @brief Both detects and decodes QR code + @param img grayscale or color (BGR) image containing QR code. + @param points optional output array of vertices of the found QR code quadrangle. Will be empty if not found. + @param straight_qrcode The optional output image containing rectified and binarized QR code + */ + CV_WRAP std::string detectAndDecode(InputArray img, OutputArray points=noArray(), + OutputArray straight_qrcode = noArray()); + /** @brief Detects QR codes in image and returns the vector of the quadrangles containing the codes. + @param img grayscale or color (BGR) image containing (or not) QR codes. + @param points Output vector of vector of vertices of the minimum-area quadrangle containing the codes. + */ + CV_WRAP + bool detectMulti(InputArray img, OutputArray points) const; + + /** @brief Decodes QR codes in image once it's found by the detect() method. + @param img grayscale or color (BGR) image containing QR codes. + @param decoded_info UTF8-encoded output vector of string or empty vector of string if the codes cannot be decoded. + @param points vector of Quadrangle vertices found by detect() method (or some other algorithm). + @param straight_qrcode The optional output vector of images containing rectified and binarized QR codes + */ + CV_WRAP + bool decodeMulti( + InputArray img, InputArray points, + CV_OUT std::vector& decoded_info, + OutputArrayOfArrays straight_qrcode = noArray() + ) const; + + /** @brief Both detects and decodes QR codes + @param img grayscale or color (BGR) image containing QR codes. + @param decoded_info UTF8-encoded output vector of string or empty vector of string if the codes cannot be decoded. + @param points optional output vector of vertices of the found QR code quadrangles. Will be empty if not found. + @param straight_qrcode The optional output vector of images containing rectified and binarized QR codes + */ + CV_WRAP + bool detectAndDecodeMulti( + InputArray img, CV_OUT std::vector& decoded_info, + OutputArray points = noArray(), + OutputArrayOfArrays straight_qrcode = noArray() + ) const; + +protected: + struct Impl; + Ptr p; +}; + +//! @} objdetect } #include "opencv2/objdetect/detection_based_tracker.hpp" -#ifndef DISABLE_OPENCV_24_COMPATIBILITY -#include "opencv2/objdetect/objdetect_c.h" -#endif - #endif diff --git a/IPL/include/opencv/opencv2/objdetect/detection_based_tracker.hpp b/IPL/include/opencv/opencv2/objdetect/detection_based_tracker.hpp index 1f5f1d3..18cde13 100644 --- a/IPL/include/opencv/opencv2/objdetect/detection_based_tracker.hpp +++ b/IPL/include/opencv/opencv2/objdetect/detection_based_tracker.hpp @@ -41,12 +41,10 @@ // //M*/ -#ifndef __OPENCV_OBJDETECT_DBT_HPP__ -#define __OPENCV_OBJDETECT_DBT_HPP__ +#ifndef OPENCV_OBJDETECT_DBT_HPP +#define OPENCV_OBJDETECT_DBT_HPP -// After this condition removal update blacklist for bindings: modules/python/common.cmake -#if defined(__linux__) || defined(LINUX) || defined(__APPLE__) || defined(__ANDROID__) || \ - (defined(__cplusplus) && __cplusplus > 201103L) || (defined(_MSC_VER) && _MSC_VER >= 1700) +#include #include @@ -59,7 +57,7 @@ namespace cv class CV_EXPORTS DetectionBasedTracker { public: - struct Parameters + struct CV_EXPORTS Parameters { int maxTrackLifetime; int minDetectionPeriod; //the minimal time between run of the big object detector (on the whole frame) in ms (1000 mean 1 sec), default=0 @@ -220,6 +218,5 @@ class CV_EXPORTS DetectionBasedTracker //! @} objdetect } //end of cv namespace -#endif #endif diff --git a/IPL/include/opencv/opencv2/objdetect/objdetect_c.h b/IPL/include/opencv/opencv2/objdetect/objdetect_c.h deleted file mode 100644 index 632a438..0000000 --- a/IPL/include/opencv/opencv2/objdetect/objdetect_c.h +++ /dev/null @@ -1,165 +0,0 @@ -/*M/////////////////////////////////////////////////////////////////////////////////////// -// -// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. -// -// By downloading, copying, installing or using the software you agree to this license. -// If you do not agree to this license, do not download, install, -// copy or use the software. -// -// -// License Agreement -// For Open Source Computer Vision Library -// -// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. -// Copyright (C) 2009, Willow Garage Inc., all rights reserved. -// Copyright (C) 2013, OpenCV Foundation, all rights reserved. -// Third party copyrights are property of their respective owners. -// -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: -// -// * Redistribution's of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// * Redistribution's in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// * The name of the copyright holders may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// This software is provided by the copyright holders and contributors "as is" and -// any express or implied warranties, including, but not limited to, the implied -// warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, -// indirect, incidental, special, exemplary, or consequential damages -// (including, but not limited to, procurement of substitute goods or services; -// loss of use, data, or profits; or business interruption) however caused -// and on any theory of liability, whether in contract, strict liability, -// or tort (including negligence or otherwise) arising in any way out of -// the use of this software, even if advised of the possibility of such damage. -// -//M*/ - -#ifndef __OPENCV_OBJDETECT_C_H__ -#define __OPENCV_OBJDETECT_C_H__ - -#include "opencv2/core/core_c.h" - -#ifdef __cplusplus -#include -#include - -extern "C" { -#endif - -/** @addtogroup objdetect_c - @{ - */ - -/****************************************************************************************\ -* Haar-like Object Detection functions * -\****************************************************************************************/ - -#define CV_HAAR_MAGIC_VAL 0x42500000 -#define CV_TYPE_NAME_HAAR "opencv-haar-classifier" - -#define CV_IS_HAAR_CLASSIFIER( haar ) \ - ((haar) != NULL && \ - (((const CvHaarClassifierCascade*)(haar))->flags & CV_MAGIC_MASK)==CV_HAAR_MAGIC_VAL) - -#define CV_HAAR_FEATURE_MAX 3 - -typedef struct CvHaarFeature -{ - int tilted; - struct - { - CvRect r; - float weight; - } rect[CV_HAAR_FEATURE_MAX]; -} CvHaarFeature; - -typedef struct CvHaarClassifier -{ - int count; - CvHaarFeature* haar_feature; - float* threshold; - int* left; - int* right; - float* alpha; -} CvHaarClassifier; - -typedef struct CvHaarStageClassifier -{ - int count; - float threshold; - CvHaarClassifier* classifier; - - int next; - int child; - int parent; -} CvHaarStageClassifier; - -typedef struct CvHidHaarClassifierCascade CvHidHaarClassifierCascade; - -typedef struct CvHaarClassifierCascade -{ - int flags; - int count; - CvSize orig_window_size; - CvSize real_window_size; - double scale; - CvHaarStageClassifier* stage_classifier; - CvHidHaarClassifierCascade* hid_cascade; -} CvHaarClassifierCascade; - -typedef struct CvAvgComp -{ - CvRect rect; - int neighbors; -} CvAvgComp; - -/* Loads haar classifier cascade from a directory. - It is obsolete: convert your cascade to xml and use cvLoad instead */ -CVAPI(CvHaarClassifierCascade*) cvLoadHaarClassifierCascade( - const char* directory, CvSize orig_window_size); - -CVAPI(void) cvReleaseHaarClassifierCascade( CvHaarClassifierCascade** cascade ); - -#define CV_HAAR_DO_CANNY_PRUNING 1 -#define CV_HAAR_SCALE_IMAGE 2 -#define CV_HAAR_FIND_BIGGEST_OBJECT 4 -#define CV_HAAR_DO_ROUGH_SEARCH 8 - -CVAPI(CvSeq*) cvHaarDetectObjects( const CvArr* image, - CvHaarClassifierCascade* cascade, CvMemStorage* storage, - double scale_factor CV_DEFAULT(1.1), - int min_neighbors CV_DEFAULT(3), int flags CV_DEFAULT(0), - CvSize min_size CV_DEFAULT(cvSize(0,0)), CvSize max_size CV_DEFAULT(cvSize(0,0))); - -/* sets images for haar classifier cascade */ -CVAPI(void) cvSetImagesForHaarClassifierCascade( CvHaarClassifierCascade* cascade, - const CvArr* sum, const CvArr* sqsum, - const CvArr* tilted_sum, double scale ); - -/* runs the cascade on the specified window */ -CVAPI(int) cvRunHaarClassifierCascade( const CvHaarClassifierCascade* cascade, - CvPoint pt, int start_stage CV_DEFAULT(0)); - -/** @} objdetect_c */ - -#ifdef __cplusplus -} - -CV_EXPORTS CvSeq* cvHaarDetectObjectsForROC( const CvArr* image, - CvHaarClassifierCascade* cascade, CvMemStorage* storage, - std::vector& rejectLevels, std::vector& levelWeightds, - double scale_factor = 1.1, - int min_neighbors = 3, int flags = 0, - CvSize min_size = cvSize(0, 0), CvSize max_size = cvSize(0, 0), - bool outputRejectLevels = false ); - -#endif - -#endif /* __OPENCV_OBJDETECT_C_H__ */ diff --git a/IPL/include/opencv/opencv2/opencv.hpp b/IPL/include/opencv/opencv2/opencv.hpp index 49b6a66..d17b94a 100644 --- a/IPL/include/opencv/opencv2/opencv.hpp +++ b/IPL/include/opencv/opencv2/opencv.hpp @@ -40,41 +40,56 @@ // //M*/ -#ifndef __OPENCV_ALL_HPP__ -#define __OPENCV_ALL_HPP__ +#ifndef OPENCV_ALL_HPP +#define OPENCV_ALL_HPP +// File that defines what modules where included during the build of OpenCV +// These are purely the defines of the correct HAVE_OPENCV_modulename values #include "opencv2/opencv_modules.hpp" +// Then the list of defines is checked to include the correct headers +// Core library is always included --> without no OpenCV functionality available #include "opencv2/core.hpp" -#ifdef HAVE_OPENCV_IMGPROC -#include "opencv2/imgproc.hpp" -#endif -#ifdef HAVE_OPENCV_PHOTO -#include "opencv2/photo.hpp" -#endif -#ifdef HAVE_OPENCV_VIDEO -#include "opencv2/video.hpp" + +// Then the optional modules are checked +#ifdef HAVE_OPENCV_CALIB3D +#include "opencv2/calib3d.hpp" #endif #ifdef HAVE_OPENCV_FEATURES2D #include "opencv2/features2d.hpp" #endif -#ifdef HAVE_OPENCV_OBJDETECT -#include "opencv2/objdetect.hpp" +#ifdef HAVE_OPENCV_DNN +#include "opencv2/dnn.hpp" #endif -#ifdef HAVE_OPENCV_CALIB3D -#include "opencv2/calib3d.hpp" +#ifdef HAVE_OPENCV_FLANN +#include "opencv2/flann.hpp" +#endif +#ifdef HAVE_OPENCV_HIGHGUI +#include "opencv2/highgui.hpp" #endif #ifdef HAVE_OPENCV_IMGCODECS #include "opencv2/imgcodecs.hpp" #endif -#ifdef HAVE_OPENCV_VIDEOIO -#include "opencv2/videoio.hpp" -#endif -#ifdef HAVE_OPENCV_HIGHGUI -#include "opencv2/highgui.hpp" +#ifdef HAVE_OPENCV_IMGPROC +#include "opencv2/imgproc.hpp" #endif #ifdef HAVE_OPENCV_ML #include "opencv2/ml.hpp" #endif +#ifdef HAVE_OPENCV_OBJDETECT +#include "opencv2/objdetect.hpp" +#endif +#ifdef HAVE_OPENCV_PHOTO +#include "opencv2/photo.hpp" +#endif +#ifdef HAVE_OPENCV_STITCHING +#include "opencv2/stitching.hpp" +#endif +#ifdef HAVE_OPENCV_VIDEO +#include "opencv2/video.hpp" +#endif +#ifdef HAVE_OPENCV_VIDEOIO +#include "opencv2/videoio.hpp" +#endif #endif diff --git a/IPL/include/opencv/opencv2/opencv_modules.hpp b/IPL/include/opencv/opencv2/opencv_modules.hpp index 9b85f33..cc3f93e 100644 --- a/IPL/include/opencv/opencv2/opencv_modules.hpp +++ b/IPL/include/opencv/opencv2/opencv_modules.hpp @@ -1,50 +1,63 @@ -/* - * ** File generated automatically, do not modify ** - * - * This file defines the list of modules available in current build configuration - * - * -*/ - -#define HAVE_OPENCV_ARUCO -#define HAVE_OPENCV_BGSEGM -#define HAVE_OPENCV_BIOINSPIRED -#define HAVE_OPENCV_CALIB3D -#define HAVE_OPENCV_CCALIB -#define HAVE_OPENCV_CORE -#define HAVE_OPENCV_DATASETS -#define HAVE_OPENCV_DNN -#define HAVE_OPENCV_DPM -#define HAVE_OPENCV_FACE -#define HAVE_OPENCV_FEATURES2D -#define HAVE_OPENCV_FLANN -#define HAVE_OPENCV_FUZZY -#define HAVE_OPENCV_HIGHGUI -#define HAVE_OPENCV_IMGCODECS -#define HAVE_OPENCV_IMGPROC -#define HAVE_OPENCV_LINE_DESCRIPTOR -#define HAVE_OPENCV_ML -#define HAVE_OPENCV_OBJDETECT -#define HAVE_OPENCV_OPTFLOW -#define HAVE_OPENCV_PHOTO -#define HAVE_OPENCV_PLOT -#define HAVE_OPENCV_REG -#define HAVE_OPENCV_RGBD -#define HAVE_OPENCV_SALIENCY -#define HAVE_OPENCV_SHAPE -#define HAVE_OPENCV_STEREO -#define HAVE_OPENCV_STITCHING -#define HAVE_OPENCV_STRUCTURED_LIGHT -#define HAVE_OPENCV_SUPERRES -#define HAVE_OPENCV_SURFACE_MATCHING -#define HAVE_OPENCV_TEXT -#define HAVE_OPENCV_TRACKING -#define HAVE_OPENCV_VIDEO -#define HAVE_OPENCV_VIDEOIO -#define HAVE_OPENCV_VIDEOSTAB -#define HAVE_OPENCV_XFEATURES2D -#define HAVE_OPENCV_XIMGPROC -#define HAVE_OPENCV_XOBJDETECT -#define HAVE_OPENCV_XPHOTO - - +/* + * ** File generated automatically, do not modify ** + * + * This file defines the list of modules available in current build configuration + * + * +*/ + +// This definition means that OpenCV is built with enabled non-free code. +// For example, patented algorithms for non-profit/non-commercial use only. +/* #undef OPENCV_ENABLE_NONFREE */ + +#define HAVE_OPENCV_ARUCO +#define HAVE_OPENCV_BGSEGM +#define HAVE_OPENCV_BIOINSPIRED +#define HAVE_OPENCV_CALIB3D +#define HAVE_OPENCV_CCALIB +#define HAVE_OPENCV_CORE +#define HAVE_OPENCV_DATASETS +#define HAVE_OPENCV_DNN +#define HAVE_OPENCV_DNN_OBJDETECT +#define HAVE_OPENCV_DNN_SUPERRES +#define HAVE_OPENCV_DPM +#define HAVE_OPENCV_FACE +#define HAVE_OPENCV_FEATURES2D +#define HAVE_OPENCV_FLANN +#define HAVE_OPENCV_FUZZY +#define HAVE_OPENCV_GAPI +#define HAVE_OPENCV_HFS +#define HAVE_OPENCV_HIGHGUI +#define HAVE_OPENCV_IMG_HASH +#define HAVE_OPENCV_IMGCODECS +#define HAVE_OPENCV_IMGPROC +#define HAVE_OPENCV_INTENSITY_TRANSFORM +#define HAVE_OPENCV_LINE_DESCRIPTOR +#define HAVE_OPENCV_ML +#define HAVE_OPENCV_OBJDETECT +#define HAVE_OPENCV_OPTFLOW +#define HAVE_OPENCV_PHASE_UNWRAPPING +#define HAVE_OPENCV_PHOTO +#define HAVE_OPENCV_PLOT +#define HAVE_OPENCV_QUALITY +#define HAVE_OPENCV_RAPID +#define HAVE_OPENCV_REG +#define HAVE_OPENCV_RGBD +#define HAVE_OPENCV_SALIENCY +#define HAVE_OPENCV_SHAPE +#define HAVE_OPENCV_STEREO +#define HAVE_OPENCV_STITCHING +#define HAVE_OPENCV_STRUCTURED_LIGHT +#define HAVE_OPENCV_SUPERRES +#define HAVE_OPENCV_SURFACE_MATCHING +#define HAVE_OPENCV_TEXT +#define HAVE_OPENCV_TRACKING +#define HAVE_OPENCV_VIDEO +#define HAVE_OPENCV_VIDEOIO +#define HAVE_OPENCV_VIDEOSTAB +#define HAVE_OPENCV_XFEATURES2D +#define HAVE_OPENCV_XIMGPROC +#define HAVE_OPENCV_XOBJDETECT +#define HAVE_OPENCV_XPHOTO + + diff --git a/IPL/include/opencv/opencv2/optflow.hpp b/IPL/include/opencv/opencv2/optflow.hpp index 667adcb..093b5fe 100644 --- a/IPL/include/opencv/opencv2/optflow.hpp +++ b/IPL/include/opencv/opencv2/optflow.hpp @@ -66,11 +66,14 @@ Functions reading and writing .flo files in "Middlebury" format, see: createOptFlow_Farneback(); //! Additional interface to the SparseToDenseFlow algorithm - calcOpticalFlowSparseToDense() CV_EXPORTS_W Ptr createOptFlow_SparseToDense(); +/** @brief "Dual TV L1" Optical Flow Algorithm. + +The class implements the "Dual TV L1" optical flow algorithm described in @cite Zach2007 and +@cite Javier2012 . +Here are important members of the class that control the algorithm, which you can set after +constructing the class instance: + +- member double tau + Time step of the numerical scheme. + +- member double lambda + Weight parameter for the data term, attachment parameter. This is the most relevant + parameter, which determines the smoothness of the output. The smaller this parameter is, + the smoother the solutions we obtain. It depends on the range of motions of the images, so + its value should be adapted to each image sequence. + +- member double theta + Weight parameter for (u - v)\^2, tightness parameter. It serves as a link between the + attachment and the regularization terms. In theory, it should have a small value in order + to maintain both parts in correspondence. The method is stable for a large range of values + of this parameter. + +- member int nscales + Number of scales used to create the pyramid of images. + +- member int warps + Number of warpings per scale. Represents the number of times that I1(x+u0) and grad( + I1(x+u0) ) are computed per scale. This is a parameter that assures the stability of the + method. It also affects the running time, so it is a compromise between speed and + accuracy. + +- member double epsilon + Stopping criterion threshold used in the numerical scheme, which is a trade-off between + precision and running time. A small value will yield more accurate solutions at the + expense of a slower convergence. + +- member int iterations + Stopping criterion iterations number used in the numerical scheme. + +C. Zach, T. Pock and H. Bischof, "A Duality Based Approach for Realtime TV-L1 Optical Flow". +Javier Sanchez, Enric Meinhardt-Llopis and Gabriele Facciolo. "TV-L1 Optical Flow Estimation". +*/ +class CV_EXPORTS_W DualTVL1OpticalFlow : public DenseOpticalFlow +{ +public: + //! @brief Time step of the numerical scheme + /** @see setTau */ + CV_WRAP virtual double getTau() const = 0; + /** @copybrief getTau @see getTau */ + CV_WRAP virtual void setTau(double val) = 0; + //! @brief Weight parameter for the data term, attachment parameter + /** @see setLambda */ + CV_WRAP virtual double getLambda() const = 0; + /** @copybrief getLambda @see getLambda */ + CV_WRAP virtual void setLambda(double val) = 0; + //! @brief Weight parameter for (u - v)^2, tightness parameter + /** @see setTheta */ + CV_WRAP virtual double getTheta() const = 0; + /** @copybrief getTheta @see getTheta */ + CV_WRAP virtual void setTheta(double val) = 0; + //! @brief coefficient for additional illumination variation term + /** @see setGamma */ + CV_WRAP virtual double getGamma() const = 0; + /** @copybrief getGamma @see getGamma */ + CV_WRAP virtual void setGamma(double val) = 0; + //! @brief Number of scales used to create the pyramid of images + /** @see setScalesNumber */ + CV_WRAP virtual int getScalesNumber() const = 0; + /** @copybrief getScalesNumber @see getScalesNumber */ + CV_WRAP virtual void setScalesNumber(int val) = 0; + //! @brief Number of warpings per scale + /** @see setWarpingsNumber */ + CV_WRAP virtual int getWarpingsNumber() const = 0; + /** @copybrief getWarpingsNumber @see getWarpingsNumber */ + CV_WRAP virtual void setWarpingsNumber(int val) = 0; + //! @brief Stopping criterion threshold used in the numerical scheme, which is a trade-off between precision and running time + /** @see setEpsilon */ + CV_WRAP virtual double getEpsilon() const = 0; + /** @copybrief getEpsilon @see getEpsilon */ + CV_WRAP virtual void setEpsilon(double val) = 0; + //! @brief Inner iterations (between outlier filtering) used in the numerical scheme + /** @see setInnerIterations */ + CV_WRAP virtual int getInnerIterations() const = 0; + /** @copybrief getInnerIterations @see getInnerIterations */ + CV_WRAP virtual void setInnerIterations(int val) = 0; + //! @brief Outer iterations (number of inner loops) used in the numerical scheme + /** @see setOuterIterations */ + CV_WRAP virtual int getOuterIterations() const = 0; + /** @copybrief getOuterIterations @see getOuterIterations */ + CV_WRAP virtual void setOuterIterations(int val) = 0; + //! @brief Use initial flow + /** @see setUseInitialFlow */ + CV_WRAP virtual bool getUseInitialFlow() const = 0; + /** @copybrief getUseInitialFlow @see getUseInitialFlow */ + CV_WRAP virtual void setUseInitialFlow(bool val) = 0; + //! @brief Step between scales (<1) + /** @see setScaleStep */ + CV_WRAP virtual double getScaleStep() const = 0; + /** @copybrief getScaleStep @see getScaleStep */ + CV_WRAP virtual void setScaleStep(double val) = 0; + //! @brief Median filter kernel size (1 = no filter) (3 or 5) + /** @see setMedianFiltering */ + CV_WRAP virtual int getMedianFiltering() const = 0; + /** @copybrief getMedianFiltering @see getMedianFiltering */ + CV_WRAP virtual void setMedianFiltering(int val) = 0; + + /** @brief Creates instance of cv::DualTVL1OpticalFlow*/ + CV_WRAP static Ptr create( + double tau = 0.25, + double lambda = 0.15, + double theta = 0.3, + int nscales = 5, + int warps = 5, + double epsilon = 0.01, + int innnerIterations = 30, + int outerIterations = 10, + double scaleStep = 0.8, + double gamma = 0.0, + int medianFiltering = 5, + bool useInitialFlow = false); +}; + +/** @brief Creates instance of cv::DenseOpticalFlow +*/ +CV_EXPORTS_W Ptr createOptFlow_DualTVL1(); + //! @} } //optflow diff --git a/IPL/include/opencv/opencv2/optflow/pcaflow.hpp b/IPL/include/opencv/opencv2/optflow/pcaflow.hpp new file mode 100644 index 0000000..78946f6 --- /dev/null +++ b/IPL/include/opencv/opencv2/optflow/pcaflow.hpp @@ -0,0 +1,149 @@ +/* +By downloading, copying, installing or using the software you agree to this +license. If you do not agree to this license, do not download, install, +copy or use the software. + + + License Agreement + For Open Source Computer Vision Library + (3-clause BSD License) + +Copyright (C) 2016, OpenCV Foundation, all rights reserved. +Third party copyrights are property of their respective owners. + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + * Neither the names of the copyright holders nor the names of the contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + +This software is provided by the copyright holders and contributors "as is" and +any express or implied warranties, including, but not limited to, the implied +warranties of merchantability and fitness for a particular purpose are +disclaimed. In no event shall copyright holders or contributors be liable for +any direct, indirect, incidental, special, exemplary, or consequential damages +(including, but not limited to, procurement of substitute goods or services; +loss of use, data, or profits; or business interruption) however caused +and on any theory of liability, whether in contract, strict liability, +or tort (including negligence or otherwise) arising in any way out of +the use of this software, even if advised of the possibility of such damage. +*/ + +/** + * @file pcaflow.hpp + * @author Vladislav Samsonov + * @brief Implementation of the PCAFlow algorithm from the following paper: + * http://files.is.tue.mpg.de/black/papers/cvpr2015_pcaflow.pdf + * + * @cite Wulff:CVPR:2015 + * + * There are some key differences which distinguish this algorithm from the original PCAFlow (see paper): + * - Discrete Cosine Transform basis is used instead of basis extracted with PCA. + * Reasoning: DCT basis has comparable performance and it doesn't require additional storage space. + * Also, this decision helps to avoid overloading the algorithm with a lot of external input. + * - Usage of built-in OpenCV feature tracking instead of libviso. +*/ + +#ifndef __OPENCV_OPTFLOW_PCAFLOW_HPP__ +#define __OPENCV_OPTFLOW_PCAFLOW_HPP__ + +#include "opencv2/core.hpp" +#include "opencv2/video.hpp" + +namespace cv +{ +namespace optflow +{ + +//! @addtogroup optflow +//! @{ + +/** @brief + * This class can be used for imposing a learned prior on the resulting optical flow. + * Solution will be regularized according to this prior. + * You need to generate appropriate prior file with "learn_prior.py" script beforehand. + */ +class CV_EXPORTS_W PCAPrior +{ +private: + Mat L1; + Mat L2; + Mat c1; + Mat c2; + +public: + PCAPrior( const char *pathToPrior ); + + int getPadding() const { return L1.size().height; } + + int getBasisSize() const { return L1.size().width; } + + void fillConstraints( float *A1, float *A2, float *b1, float *b2 ) const; +}; + +/** @brief PCAFlow algorithm. + */ +class CV_EXPORTS_W OpticalFlowPCAFlow : public DenseOpticalFlow +{ +protected: + const Ptr prior; + const Size basisSize; + const float sparseRate; // (0 .. 0.1) + const float retainedCornersFraction; // [0 .. 1] + const float occlusionsThreshold; + const float dampingFactor; + const float claheClip; + bool useOpenCL; + +public: + /** @brief Creates an instance of PCAFlow algorithm. + * @param _prior Learned prior or no prior (default). @see cv::optflow::PCAPrior + * @param _basisSize Number of basis vectors. + * @param _sparseRate Controls density of sparse matches. + * @param _retainedCornersFraction Retained corners fraction. + * @param _occlusionsThreshold Occlusion threshold. + * @param _dampingFactor Regularization term for solving least-squares. It is not related to the prior regularization. + * @param _claheClip Clip parameter for CLAHE. + */ + OpticalFlowPCAFlow( Ptr _prior = Ptr(), const Size _basisSize = Size( 18, 14 ), + float _sparseRate = 0.024, float _retainedCornersFraction = 0.2, + float _occlusionsThreshold = 0.0003, float _dampingFactor = 0.00002, float _claheClip = 14 ); + + void calc( InputArray I0, InputArray I1, InputOutputArray flow ) CV_OVERRIDE; + void collectGarbage() CV_OVERRIDE; + +private: + void findSparseFeatures( UMat &from, UMat &to, std::vector &features, + std::vector &predictedFeatures ) const; + + void removeOcclusions( UMat &from, UMat &to, std::vector &features, + std::vector &predictedFeatures ) const; + + void getSystem( OutputArray AOut, OutputArray b1Out, OutputArray b2Out, const std::vector &features, + const std::vector &predictedFeatures, const Size size ); + + void getSystem( OutputArray A1Out, OutputArray A2Out, OutputArray b1Out, OutputArray b2Out, + const std::vector &features, const std::vector &predictedFeatures, + const Size size ); + + OpticalFlowPCAFlow& operator=( const OpticalFlowPCAFlow& ); // make it non-assignable +}; + +/** @brief Creates an instance of PCAFlow +*/ +CV_EXPORTS_W Ptr createOptFlow_PCAFlow(); + +//! @} + +} +} + +#endif diff --git a/IPL/include/opencv/opencv2/optflow/rlofflow.hpp b/IPL/include/opencv/opencv2/optflow/rlofflow.hpp new file mode 100644 index 0000000..58daf53 --- /dev/null +++ b/IPL/include/opencv/opencv2/optflow/rlofflow.hpp @@ -0,0 +1,551 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +#ifndef __OPENCV_OPTFLOW_RLOFFLOW_HPP__ +#define __OPENCV_OPTFLOW_RLOFFLOW_HPP__ + +#include "opencv2/core.hpp" +#include "opencv2/video.hpp" + +namespace cv +{ +namespace optflow +{ +//! @addtogroup optflow +//! @{ + +enum SupportRegionType { + SR_FIXED = 0, /**< Apply a constant support region */ + SR_CROSS = 1 /**< Apply a adaptive support region obtained by cross-based segmentation + * as described in @cite Senst2014 + */ +}; +enum SolverType { + ST_STANDART = 0, /**< Apply standard iterative refinement */ + ST_BILINEAR = 1 /**< Apply optimized iterative refinement based bilinear equation solutions + * as described in @cite Senst2013 + */ +}; + +enum InterpolationType +{ + INTERP_GEO = 0, /**< Fast geodesic interpolation, see @cite Geistert2016 */ + INTERP_EPIC = 1, /**< Edge-preserving interpolation using ximgproc::EdgeAwareInterpolator, see @cite Revaud2015,Geistert2016. */ + INTERP_RIC = 2, /**< SLIC based robust interpolation using ximgproc::RICInterpolator, see @cite Hu2017. */ +}; + +/** @brief This is used store and set up the parameters of the robust local optical flow (RLOF) algoritm. + * + * The RLOF is a fast local optical flow approach described in @cite Senst2012 @cite Senst2013 @cite Senst2014 + * and @cite Senst2016 similar to the pyramidal iterative Lucas-Kanade method as + * proposed by @cite Bouguet00. More details and experiments can be found in the following thesis @cite Senst2019. + * The implementation is derived from optflow::calcOpticalFlowPyrLK(). + * This RLOF implementation can be seen as an improved pyramidal iterative Lucas-Kanade and includes + * a set of improving modules. The main improvements in respect to the pyramidal iterative Lucas-Kanade + * are: + * - A more robust redecending M-estimator framework (see @cite Senst2012) to improve the accuracy at + * motion boundaries and appearing and disappearing pixels. + * - an adaptive support region strategies to improve the accuracy at motion boundaries to reduce the + * corona effect, i.e oversmoothing of the PLK at motion/object boundaries. The cross-based segementation + * strategy (SR_CROSS) proposed in @cite Senst2014 uses a simple segmenation approach to obtain the optimal + * shape of the support region. + * - To deal with illumination changes (outdoor sequences and shadow) the intensity constancy assumption + * based optical flow equation has been adopt with the Gennert and Negahdaripour illumination model + * (see @cite Senst2016). This model can be switched on/off with the useIlluminationModel variable. + * - By using a global motion prior initialization (see @cite Senst2016) of the iterative refinement + * the accuracy could be significantly improved for large displacements. This initialization can be + * switched on and of with useGlobalMotionPrior variable. + * + * The RLOF can be computed with the SparseOpticalFlow class or function interface to track a set of features + * or with the DenseOpticalFlow class or function interface to compute dense optical flow. + * + * @see optflow::DenseRLOFOpticalFlow, optflow::calcOpticalFlowDenseRLOF(), optflow::SparseRLOFOpticalFlow, optflow::calcOpticalFlowSparseRLOF() + */ +class CV_EXPORTS_W RLOFOpticalFlowParameter{ +public: + RLOFOpticalFlowParameter() + :solverType(ST_BILINEAR) + ,supportRegionType(SR_CROSS) + ,normSigma0(std::numeric_limits::max()) + ,normSigma1(std::numeric_limits::max()) + ,smallWinSize(9) + ,largeWinSize(21) + ,crossSegmentationThreshold(25) + ,maxLevel(4) + ,useInitialFlow(false) + ,useIlluminationModel(true) + ,useGlobalMotionPrior(true) + ,maxIteration(30) + ,minEigenValue(0.0001f) + ,globalMotionRansacThreshold(10) + {} + + SolverType solverType; + /**< Variable specifies the iterative refinement strategy. Please consider citing @cite Senst2013 when + * using ST_BILINEAR. + */ + + SupportRegionType supportRegionType; + /**< Variable specifies the support region shape extraction or shrinking strategy. + */ + + float normSigma0; + /**< &sigma parameter of the shrinked Hampel norm introduced in @cite Senst2012. If + * &sigma = std::numeric_limist::max() the least-square estimator will be used + * instead of the M-estimator. Althoug M-estimator is more robust against outlier in the support + * region the least-square can be fast in computation. + */ + float normSigma1; + /**< &sigma parameter of the shrinked Hampel norm introduced in @cite Senst2012. If + * &sigma = std::numeric_limist::max() the least-square estimator will be used + * instead of the M-estimator. Althoug M-estimator is more robust against outlier in the support + * region the least-square can be fast in computation. + */ + int smallWinSize; + /**< Minimal window size of the support region. This parameter is only used if supportRegionType is SR_CROSS. + */ + int largeWinSize; + /**< Maximal window size of the support region. If supportRegionType is SR_FIXED this gives the exact support + * region size. The speed of the RLOF is related to the applied win sizes. The smaller the window size the lower is the runtime, + * but the more sensitive to noise is the method. + */ + int crossSegmentationThreshold; + /**< Color similarity threshold used by cross-based segmentation following @cite Senst2014 . + * (Only used if supportRegionType is SR_CROSS). With the cross-bassed segmentation + * motion boundaries can be computed more accurately. + */ + int maxLevel; + /**< Maximal number of pyramid level used. The large this value is the more likely it is + * to obtain accurate solutions for long-range motions. The runtime is linear related to + * this parameter. + */ + bool useInitialFlow; + /**< Use next point list as initial values. A good intialization can imporve the algortihm + * accuracy and reduce the runtime by a faster convergence of the iteration refinement. + */ + bool useIlluminationModel; + /**< Use the Gennert and Negahdaripour illumination model instead of the intensity brigthness + * constraint. (proposed in @cite Senst2016 ) This model is defined as follow: + * \f[ I(\mathbf{x},t) + m \cdot I(\mathbf{x},t) + c = I(\mathbf{x},t+1) \f] + * and contains with m and c a multiplicative and additive term which makes the estimate + * more robust against illumination changes. The computational complexity is increased by + * enabling the illumination model. + */ + bool useGlobalMotionPrior; + /**< Use global motion prior initialisation has been introduced in @cite Senst2016 . It + * allows to be more accurate for long-range motion. The computational complexity is + * slightly increased by enabling the global motion prior initialisation. + */ + int maxIteration; + /**< Number of maximal iterations used for the iterative refinement. Lower values can + * reduce the runtime but also the accuracy. + */ + float minEigenValue; + /**< Threshold for the minimal eigenvalue of the gradient matrix defines when to abort the + * iterative refinement. + */ + float globalMotionRansacThreshold; + /**< To apply the global motion prior motion vectors will be computed on a regulary sampled which + * are the basis for Homography estimation using RANSAC. The reprojection threshold is based on + * n-th percentil (given by this value [0 ... 100]) of the motion vectors magnitude. + * See @cite Senst2016 for more details. + */ + + //! @brief Enable M-estimator or disable and use least-square estimator. + /** Enables M-estimator by setting sigma parameters to (3.2, 7.0). Disabling M-estimator can reduce + * runtime, while enabling can improve the accuracy. + * @param val If true M-estimator is used. If false least-square estimator is used. + * @see setNormSigma0, setNormSigma1 + */ + CV_WRAP void setUseMEstimator(bool val); + + CV_WRAP void setSolverType(SolverType val); + CV_WRAP SolverType getSolverType() const; + + CV_WRAP void setSupportRegionType(SupportRegionType val); + CV_WRAP SupportRegionType getSupportRegionType() const; + + CV_WRAP void setNormSigma0(float val); + CV_WRAP float getNormSigma0() const; + + CV_WRAP void setNormSigma1(float val); + CV_WRAP float getNormSigma1() const; + + CV_WRAP void setSmallWinSize(int val); + CV_WRAP int getSmallWinSize() const; + + CV_WRAP void setLargeWinSize(int val); + CV_WRAP int getLargeWinSize() const; + + CV_WRAP void setCrossSegmentationThreshold(int val); + CV_WRAP int getCrossSegmentationThreshold() const; + + CV_WRAP void setMaxLevel(int val); + CV_WRAP int getMaxLevel() const; + + CV_WRAP void setUseInitialFlow(bool val); + CV_WRAP bool getUseInitialFlow() const; + + CV_WRAP void setUseIlluminationModel(bool val); + CV_WRAP bool getUseIlluminationModel() const; + + CV_WRAP void setUseGlobalMotionPrior(bool val); + CV_WRAP bool getUseGlobalMotionPrior() const; + + CV_WRAP void setMaxIteration(int val); + CV_WRAP int getMaxIteration() const; + + CV_WRAP void setMinEigenValue(float val); + CV_WRAP float getMinEigenValue() const; + + CV_WRAP void setGlobalMotionRansacThreshold(float val); + CV_WRAP float getGlobalMotionRansacThreshold() const; + + //! @brief Creates instance of optflow::RLOFOpticalFlowParameter + CV_WRAP static Ptr create(); +}; + +/** @brief Fast dense optical flow computation based on robust local optical flow (RLOF) algorithms and sparse-to-dense interpolation + * scheme. + * + * The RLOF is a fast local optical flow approach described in @cite Senst2012 @cite Senst2013 @cite Senst2014 + * and @cite Senst2016 similar to the pyramidal iterative Lucas-Kanade method as + * proposed by @cite Bouguet00. More details and experiments can be found in the following thesis @cite Senst2019. + * The implementation is derived from optflow::calcOpticalFlowPyrLK(). + * + * The sparse-to-dense interpolation scheme allows for fast computation of dense optical flow using RLOF (see @cite Geistert2016). + * For this scheme the following steps are applied: + * -# motion vector seeded at a regular sampled grid are computed. The sparsity of this grid can be configured with setGridStep + * -# (optinally) errornous motion vectors are filter based on the forward backward confidence. The threshold can be configured + * with setForwardBackward. The filter is only applied if the threshold >0 but than the runtime is doubled due to the estimation + * of the backward flow. + * -# Vector field interpolation is applied to the motion vector set to obtain a dense vector field. + * + * For the RLOF configuration see optflow::RLOFOpticalFlowParameter for further details. + * Parameters have been described in @cite Senst2012 @cite Senst2013 @cite Senst2014 and @cite Senst2016. + * + * @note If the grid size is set to (1,1) and the forward backward threshold <= 0 than pixelwise dense optical flow field is + * computed by RLOF without using interpolation. + * + * @see optflow::calcOpticalFlowDenseRLOF(), optflow::RLOFOpticalFlowParameter +*/ +class CV_EXPORTS_W DenseRLOFOpticalFlow : public DenseOpticalFlow +{ +public: + //! @brief Configuration of the RLOF alogrithm. + /** + @see optflow::RLOFOpticalFlowParameter, getRLOFOpticalFlowParameter + */ + CV_WRAP virtual void setRLOFOpticalFlowParameter(Ptr val) = 0; + /** @copybrief setRLOFOpticalFlowParameter + @see optflow::RLOFOpticalFlowParameter, setRLOFOpticalFlowParameter + */ + CV_WRAP virtual Ptr getRLOFOpticalFlowParameter() const = 0; + //! @brief Threshold for the forward backward confidence check + /**For each grid point \f$ \mathbf{x} \f$ a motion vector \f$ d_{I0,I1}(\mathbf{x}) \f$ is computed. + * If the forward backward error \f[ EP_{FB} = || d_{I0,I1} + d_{I1,I0} || \f] + * is larger than threshold given by this function then the motion vector will not be used by the following + * vector field interpolation. \f$ d_{I1,I0} \f$ denotes the backward flow. Note, the forward backward test + * will only be applied if the threshold > 0. This may results into a doubled runtime for the motion estimation. + * @see getForwardBackward, setGridStep + */ + CV_WRAP virtual void setForwardBackward(float val) = 0; + /** @copybrief setForwardBackward + @see setForwardBackward + */ + CV_WRAP virtual float getForwardBackward() const = 0; + //! @brief Size of the grid to spawn the motion vectors. + /** For each grid point a motion vector is computed. Some motion vectors will be removed due to the forwatd backward + * threshold (if set >0). The rest will be the base of the vector field interpolation. + * @see getForwardBackward, setGridStep + */ + CV_WRAP virtual Size getGridStep() const = 0; + /** @copybrief getGridStep + * @see getGridStep + */ + CV_WRAP virtual void setGridStep(Size val) = 0; + + //! @brief Interpolation used to compute the dense optical flow. + /** Two interpolation algorithms are supported + * - **INTERP_GEO** applies the fast geodesic interpolation, see @cite Geistert2016. + * - **INTERP_EPIC_RESIDUAL** applies the edge-preserving interpolation, see @cite Revaud2015,Geistert2016. + * @see ximgproc::EdgeAwareInterpolator, getInterpolation + */ + CV_WRAP virtual void setInterpolation(InterpolationType val) = 0; + /** @copybrief setInterpolation + * @see ximgproc::EdgeAwareInterpolator, setInterpolation + */ + CV_WRAP virtual InterpolationType getInterpolation() const = 0; + //! @brief see ximgproc::EdgeAwareInterpolator() K value. + /** K is a number of nearest-neighbor matches considered, when fitting a locally affine + * model. Usually it should be around 128. However, lower values would make the interpolation noticeably faster. + * @see ximgproc::EdgeAwareInterpolator, setEPICK + */ + CV_WRAP virtual int getEPICK() const = 0; + /** @copybrief getEPICK + * @see ximgproc::EdgeAwareInterpolator, getEPICK + */ + CV_WRAP virtual void setEPICK(int val) = 0; + //! @brief see ximgproc::EdgeAwareInterpolator() sigma value. + /** Sigma is a parameter defining how fast the weights decrease in the locally-weighted affine + * fitting. Higher values can help preserve fine details, lower values can help to get rid of noise in the + * output flow. + * @see ximgproc::EdgeAwareInterpolator, setEPICSigma + */ + CV_WRAP virtual float getEPICSigma() const = 0; + /** @copybrief getEPICSigma + * @see ximgproc::EdgeAwareInterpolator, getEPICSigma + */ + CV_WRAP virtual void setEPICSigma(float val) = 0; + //! @brief see ximgproc::EdgeAwareInterpolator() lambda value. + /** Lambda is a parameter defining the weight of the edge-aware term in geodesic distance, + * should be in the range of 0 to 1000. + * @see ximgproc::EdgeAwareInterpolator, setEPICSigma + */ + CV_WRAP virtual float getEPICLambda() const = 0; + /** @copybrief getEPICLambda + * @see ximgproc::EdgeAwareInterpolator, getEPICLambda + */ + CV_WRAP virtual void setEPICLambda(float val) = 0; + //! @brief see ximgproc::EdgeAwareInterpolator(). + /** Sets the respective fastGlobalSmootherFilter() parameter. + * @see ximgproc::EdgeAwareInterpolator, setFgsLambda + */ + CV_WRAP virtual float getFgsLambda() const = 0; + /** @copybrief getFgsLambda + * @see ximgproc::EdgeAwareInterpolator, ximgproc::fastGlobalSmootherFilter, getFgsLambda + */ + CV_WRAP virtual void setFgsLambda(float val) = 0; + //! @brief see ximgproc::EdgeAwareInterpolator(). + /** Sets the respective fastGlobalSmootherFilter() parameter. + * @see ximgproc::EdgeAwareInterpolator, ximgproc::fastGlobalSmootherFilter, setFgsSigma + */ + CV_WRAP virtual float getFgsSigma() const = 0; + /** @copybrief getFgsSigma + * @see ximgproc::EdgeAwareInterpolator, ximgproc::fastGlobalSmootherFilter, getFgsSigma + */ + CV_WRAP virtual void setFgsSigma(float val) = 0; + //! @brief enables ximgproc::fastGlobalSmootherFilter + /** + * @see getUsePostProc + */ + CV_WRAP virtual void setUsePostProc(bool val) = 0; + /** @copybrief setUsePostProc + * @see ximgproc::fastGlobalSmootherFilter, setUsePostProc + */ + CV_WRAP virtual bool getUsePostProc() const = 0; + //! @brief enables VariationalRefinement + /** + * @see getUseVariationalRefinement + */ + CV_WRAP virtual void setUseVariationalRefinement(bool val) = 0; + /** @copybrief setUseVariationalRefinement + * @see ximgproc::fastGlobalSmootherFilter, setUsePostProc + */ + CV_WRAP virtual bool getUseVariationalRefinement() const = 0; + //! @brief Parameter to tune the approximate size of the superpixel used for oversegmentation. + /** + * @see cv::ximgproc::createSuperpixelSLIC, cv::ximgproc::RICInterpolator + */ + CV_WRAP virtual void setRICSPSize(int val) = 0; + /** @copybrief setRICSPSize + * @see setRICSPSize + */ + CV_WRAP virtual int getRICSPSize() const = 0; + /** @brief Parameter to choose superpixel algorithm variant to use: + * - cv::ximgproc::SLICType SLIC segments image using a desired region_size (value: 100) + * - cv::ximgproc::SLICType SLICO will optimize using adaptive compactness factor (value: 101) + * - cv::ximgproc::SLICType MSLIC will optimize using manifold methods resulting in more content-sensitive superpixels (value: 102). + * @see cv::ximgproc::createSuperpixelSLIC, cv::ximgproc::RICInterpolator + */ + CV_WRAP virtual void setRICSLICType(int val) = 0; + /** @copybrief setRICSLICType + * @see setRICSLICType + */ + CV_WRAP virtual int getRICSLICType() const = 0; + //! @brief Creates instance of optflow::DenseRLOFOpticalFlow + /** + * @param rlofParam see optflow::RLOFOpticalFlowParameter + * @param forwardBackwardThreshold see setForwardBackward + * @param gridStep see setGridStep + * @param interp_type see setInterpolation + * @param epicK see setEPICK + * @param epicSigma see setEPICSigma + * @param epicLambda see setEPICLambda + * @param ricSPSize see setRICSPSize + * @param ricSLICType see setRICSLICType + * @param use_post_proc see setUsePostProc + * @param fgsLambda see setFgsLambda + * @param fgsSigma see setFgsSigma + * @param use_variational_refinement see setUseVariationalRefinement + */ + CV_WRAP static Ptr create( + Ptr rlofParam = Ptr(), + float forwardBackwardThreshold = 1.f, + Size gridStep = Size(6, 6), + InterpolationType interp_type = InterpolationType::INTERP_EPIC, + int epicK = 128, + float epicSigma = 0.05f, + float epicLambda = 999.0f, + int ricSPSize = 15, + int ricSLICType = 100, + bool use_post_proc = true, + float fgsLambda = 500.0f, + float fgsSigma = 1.5f, + bool use_variational_refinement = false); +}; + +/** @brief Class used for calculation sparse optical flow and feature tracking with robust local optical flow (RLOF) algorithms. +* +* The RLOF is a fast local optical flow approach described in @cite Senst2012 @cite Senst2013 @cite Senst2014 + * and @cite Senst2016 similar to the pyramidal iterative Lucas-Kanade method as +* proposed by @cite Bouguet00. More details and experiments can be found in the following thesis @cite Senst2019. +* The implementation is derived from optflow::calcOpticalFlowPyrLK(). +* +* For the RLOF configuration see optflow::RLOFOpticalFlowParameter for further details. +* Parameters have been described in @cite Senst2012, @cite Senst2013, @cite Senst2014 and @cite Senst2016. +* +* @note SIMD parallelization is only available when compiling with SSE4.1. +* @see optflow::calcOpticalFlowSparseRLOF(), optflow::RLOFOpticalFlowParameter +*/ +class CV_EXPORTS_W SparseRLOFOpticalFlow : public SparseOpticalFlow +{ +public: + /** @copydoc DenseRLOFOpticalFlow::setRLOFOpticalFlowParameter + */ + CV_WRAP virtual void setRLOFOpticalFlowParameter(Ptr val) = 0; + /** @copybrief setRLOFOpticalFlowParameter + * @see setRLOFOpticalFlowParameter + */ + CV_WRAP virtual Ptr getRLOFOpticalFlowParameter() const = 0; + //! @brief Threshold for the forward backward confidence check + /** For each feature point a motion vector \f$ d_{I0,I1}(\mathbf{x}) \f$ is computed. + * If the forward backward error \f[ EP_{FB} = || d_{I0,I1} + d_{I1,I0} || \f] + * is larger than threshold given by this function then the status will not be used by the following + * vector field interpolation. \f$ d_{I1,I0} \f$ denotes the backward flow. Note, the forward backward test + * will only be applied if the threshold > 0. This may results into a doubled runtime for the motion estimation. + * @see setForwardBackward + */ + CV_WRAP virtual void setForwardBackward(float val) = 0; + /** @copybrief setForwardBackward + * @see setForwardBackward + */ + CV_WRAP virtual float getForwardBackward() const = 0; + + //! @brief Creates instance of SparseRLOFOpticalFlow + /** + * @param rlofParam see setRLOFOpticalFlowParameter + * @param forwardBackwardThreshold see setForwardBackward + */ + CV_WRAP static Ptr create( + Ptr rlofParam = Ptr(), + float forwardBackwardThreshold = 1.f); + +}; + +/** @brief Fast dense optical flow computation based on robust local optical flow (RLOF) algorithms and sparse-to-dense interpolation scheme. + * + * The RLOF is a fast local optical flow approach described in @cite Senst2012 @cite Senst2013 @cite Senst2014 + * and @cite Senst2016 similar to the pyramidal iterative Lucas-Kanade method as + * proposed by @cite Bouguet00. More details and experiments can be found in the following thesis @cite Senst2019. + * The implementation is derived from optflow::calcOpticalFlowPyrLK(). + * + * The sparse-to-dense interpolation scheme allows for fast computation of dense optical flow using RLOF (see @cite Geistert2016). + * For this scheme the following steps are applied: + * -# motion vector seeded at a regular sampled grid are computed. The sparsity of this grid can be configured with setGridStep + * -# (optinally) errornous motion vectors are filter based on the forward backward confidence. The threshold can be configured + * with setForwardBackward. The filter is only applied if the threshold >0 but than the runtime is doubled due to the estimation + * of the backward flow. + * -# Vector field interpolation is applied to the motion vector set to obtain a dense vector field. + * + * @param I0 first 8-bit input image. If The cross-based RLOF is used (by selecting optflow::RLOFOpticalFlowParameter::supportRegionType + * = SupportRegionType::SR_CROSS) image has to be a 8-bit 3 channel image. + * @param I1 second 8-bit input image. If The cross-based RLOF is used (by selecting optflow::RLOFOpticalFlowParameter::supportRegionType + * = SupportRegionType::SR_CROSS) image has to be a 8-bit 3 channel image. + * @param flow computed flow image that has the same size as I0 and type CV_32FC2. + * @param rlofParam see optflow::RLOFOpticalFlowParameter + * @param forwardBackwardThreshold Threshold for the forward backward confidence check. + * For each grid point \f$ \mathbf{x} \f$ a motion vector \f$ d_{I0,I1}(\mathbf{x}) \f$ is computed. + * If the forward backward error \f[ EP_{FB} = || d_{I0,I1} + d_{I1,I0} || \f] + * is larger than threshold given by this function then the motion vector will not be used by the following + * vector field interpolation. \f$ d_{I1,I0} \f$ denotes the backward flow. Note, the forward backward test + * will only be applied if the threshold > 0. This may results into a doubled runtime for the motion estimation. + * @param gridStep Size of the grid to spawn the motion vectors. For each grid point a motion vector is computed. + * Some motion vectors will be removed due to the forwatd backward threshold (if set >0). The rest will be the + * base of the vector field interpolation. + * @param interp_type interpolation method used to compute the dense optical flow. Two interpolation algorithms are + * supported: + * - **INTERP_GEO** applies the fast geodesic interpolation, see @cite Geistert2016. + * - **INTERP_EPIC_RESIDUAL** applies the edge-preserving interpolation, see @cite Revaud2015,Geistert2016. + * @param epicK see ximgproc::EdgeAwareInterpolator sets the respective parameter. + * @param epicSigma see ximgproc::EdgeAwareInterpolator sets the respective parameter. + * @param epicLambda see ximgproc::EdgeAwareInterpolator sets the respective parameter. + * @param ricSPSize see ximgproc::RICInterpolator sets the respective parameter. + * @param ricSLICType see ximgproc::RICInterpolator sets the respective parameter. + * @param use_post_proc enables ximgproc::fastGlobalSmootherFilter() parameter. + * @param fgsLambda sets the respective ximgproc::fastGlobalSmootherFilter() parameter. + * @param fgsSigma sets the respective ximgproc::fastGlobalSmootherFilter() parameter. + * @param use_variational_refinement enables VariationalRefinement + * + * Parameters have been described in @cite Senst2012, @cite Senst2013, @cite Senst2014, @cite Senst2016. + * For the RLOF configuration see optflow::RLOFOpticalFlowParameter for further details. + * @note If the grid size is set to (1,1) and the forward backward threshold <= 0 that the dense optical flow field is purely + * computed with the RLOF. + * + * @note SIMD parallelization is only available when compiling with SSE4.1. + * + * @sa optflow::DenseRLOFOpticalFlow, optflow::RLOFOpticalFlowParameter +*/ +CV_EXPORTS_W void calcOpticalFlowDenseRLOF(InputArray I0, InputArray I1, InputOutputArray flow, + Ptr rlofParam = Ptr(), + float forwardBackwardThreshold = 0, Size gridStep = Size(6, 6), + InterpolationType interp_type = InterpolationType::INTERP_EPIC, + int epicK = 128, float epicSigma = 0.05f, float epicLambda = 100.f, + int ricSPSize = 15, int ricSLICType = 100, + bool use_post_proc = true, float fgsLambda = 500.0f, float fgsSigma = 1.5f, + bool use_variational_refinement = false); + +/** @brief Calculates fast optical flow for a sparse feature set using the robust local optical flow (RLOF) similar +* to optflow::calcOpticalFlowPyrLK(). +* +* The RLOF is a fast local optical flow approach described in @cite Senst2012 @cite Senst2013 @cite Senst2014 + * and @cite Senst2016 similar to the pyramidal iterative Lucas-Kanade method as +* proposed by @cite Bouguet00. More details and experiments can be found in the following thesis @cite Senst2019. +* The implementation is derived from optflow::calcOpticalFlowPyrLK(). +* +* @param prevImg first 8-bit input image. If The cross-based RLOF is used (by selecting optflow::RLOFOpticalFlowParameter::supportRegionType +* = SupportRegionType::SR_CROSS) image has to be a 8-bit 3 channel image. +* @param nextImg second 8-bit input image. If The cross-based RLOF is used (by selecting optflow::RLOFOpticalFlowParameter::supportRegionType +* = SupportRegionType::SR_CROSS) image has to be a 8-bit 3 channel image. +* @param prevPts vector of 2D points for which the flow needs to be found; point coordinates must be single-precision +* floating-point numbers. +* @param nextPts output vector of 2D points (with single-precision floating-point coordinates) containing the calculated +* new positions of input features in the second image; when optflow::RLOFOpticalFlowParameter::useInitialFlow variable is true the vector must +* have the same size as in the input and contain the initialization point correspondences. +* @param status output status vector (of unsigned chars); each element of the vector is set to 1 if the flow for the +* corresponding features has passed the forward backward check. +* @param err output vector of errors; each element of the vector is set to the forward backward error for the corresponding feature. +* @param rlofParam see optflow::RLOFOpticalFlowParameter +* @param forwardBackwardThreshold Threshold for the forward backward confidence check. If forewardBackwardThreshold <=0 the forward +* +* @note SIMD parallelization is only available when compiling with SSE4.1. +* +* Parameters have been described in @cite Senst2012, @cite Senst2013, @cite Senst2014 and @cite Senst2016. +* For the RLOF configuration see optflow::RLOFOpticalFlowParameter for further details. +*/ +CV_EXPORTS_W void calcOpticalFlowSparseRLOF(InputArray prevImg, InputArray nextImg, + InputArray prevPts, InputOutputArray nextPts, + OutputArray status, OutputArray err, + Ptr rlofParam = Ptr(), + float forwardBackwardThreshold = 0); + +//! Additional interface to the Dense RLOF algorithm - optflow::calcOpticalFlowDenseRLOF() +CV_EXPORTS_W Ptr createOptFlow_DenseRLOF(); + +//! Additional interface to the Sparse RLOF algorithm - optflow::calcOpticalFlowSparseRLOF() +CV_EXPORTS_W Ptr createOptFlow_SparseRLOF(); +//! @} + +} // namespace +} // namespace +#endif diff --git a/IPL/include/opencv/opencv2/optflow/sparse_matching_gpc.hpp b/IPL/include/opencv/opencv2/optflow/sparse_matching_gpc.hpp new file mode 100644 index 0000000..5256534 --- /dev/null +++ b/IPL/include/opencv/opencv2/optflow/sparse_matching_gpc.hpp @@ -0,0 +1,372 @@ +/* +By downloading, copying, installing or using the software you agree to this +license. If you do not agree to this license, do not download, install, +copy or use the software. + + + License Agreement + For Open Source Computer Vision Library + (3-clause BSD License) + +Copyright (C) 2016, OpenCV Foundation, all rights reserved. +Third party copyrights are property of their respective owners. + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + * Neither the names of the copyright holders nor the names of the contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + +This software is provided by the copyright holders and contributors "as is" and +any express or implied warranties, including, but not limited to, the implied +warranties of merchantability and fitness for a particular purpose are +disclaimed. In no event shall copyright holders or contributors be liable for +any direct, indirect, incidental, special, exemplary, or consequential damages +(including, but not limited to, procurement of substitute goods or services; +loss of use, data, or profits; or business interruption) however caused +and on any theory of liability, whether in contract, strict liability, +or tort (including negligence or otherwise) arising in any way out of +the use of this software, even if advised of the possibility of such damage. +*/ + +/** + * @file sparse_matching_gpc.hpp + * @author Vladislav Samsonov + * @brief Implementation of the Global Patch Collider. + * + * Implementation of the Global Patch Collider algorithm from the following paper: + * http://research.microsoft.com/en-us/um/people/pkohli/papers/wfrik_cvpr2016.pdf + * + * @cite Wang_2016_CVPR + */ + +#ifndef __OPENCV_OPTFLOW_SPARSE_MATCHING_GPC_HPP__ +#define __OPENCV_OPTFLOW_SPARSE_MATCHING_GPC_HPP__ + +#include "opencv2/core.hpp" +#include "opencv2/imgproc.hpp" + +namespace cv +{ +namespace optflow +{ + +//! @addtogroup optflow +//! @{ + +struct CV_EXPORTS_W GPCPatchDescriptor +{ + static const unsigned nFeatures = 18; //!< number of features in a patch descriptor + Vec< double, nFeatures > feature; + + double dot( const Vec< double, nFeatures > &coef ) const; + + void markAsSeparated() { feature[0] = std::numeric_limits< double >::quiet_NaN(); } + + bool isSeparated() const { return cvIsNaN( feature[0] ) != 0; } +}; + +struct CV_EXPORTS_W GPCPatchSample +{ + GPCPatchDescriptor ref; + GPCPatchDescriptor pos; + GPCPatchDescriptor neg; + + void getDirections( bool &refdir, bool &posdir, bool &negdir, const Vec< double, GPCPatchDescriptor::nFeatures > &coef, double rhs ) const; +}; + +typedef std::vector< GPCPatchSample > GPCSamplesVector; + +/** @brief Descriptor types for the Global Patch Collider. + */ +enum GPCDescType +{ + GPC_DESCRIPTOR_DCT = 0, //!< Better quality but slow + GPC_DESCRIPTOR_WHT //!< Worse quality but much faster +}; + +/** @brief Class encapsulating training samples. + */ +class CV_EXPORTS_W GPCTrainingSamples +{ +private: + GPCSamplesVector samples; + int descriptorType; + +public: + /** @brief This function can be used to extract samples from a pair of images and a ground truth flow. + * Sizes of all the provided vectors must be equal. + */ + static Ptr< GPCTrainingSamples > create( const std::vector< String > &imagesFrom, const std::vector< String > &imagesTo, + const std::vector< String > >, int descriptorType ); + + static Ptr< GPCTrainingSamples > create( InputArrayOfArrays imagesFrom, InputArrayOfArrays imagesTo, InputArrayOfArrays gt, + int descriptorType ); + + size_t size() const { return samples.size(); } + + int type() const { return descriptorType; } + + operator GPCSamplesVector &() { return samples; } +}; + +/** @brief Class encapsulating training parameters. + */ +struct GPCTrainingParams +{ + unsigned maxTreeDepth; //!< Maximum tree depth to stop partitioning. + int minNumberOfSamples; //!< Minimum number of samples in the node to stop partitioning. + int descriptorType; //!< Type of descriptors to use. + bool printProgress; //!< Print progress to stdout. + + GPCTrainingParams( unsigned _maxTreeDepth = 20, int _minNumberOfSamples = 3, GPCDescType _descriptorType = GPC_DESCRIPTOR_DCT, + bool _printProgress = true ) + : maxTreeDepth( _maxTreeDepth ), minNumberOfSamples( _minNumberOfSamples ), descriptorType( _descriptorType ), + printProgress( _printProgress ) + { + CV_Assert( check() ); + } + + bool check() const { return maxTreeDepth > 1 && minNumberOfSamples > 1; } +}; + +/** @brief Class encapsulating matching parameters. + */ +struct GPCMatchingParams +{ + bool useOpenCL; //!< Whether to use OpenCL to speed up the matching. + + GPCMatchingParams( bool _useOpenCL = false ) : useOpenCL( _useOpenCL ) {} + + GPCMatchingParams( const GPCMatchingParams ¶ms ) : useOpenCL( params.useOpenCL ) {} +}; + +/** @brief Class for individual tree. + */ +class CV_EXPORTS_W GPCTree : public Algorithm +{ +public: + struct Node + { + Vec< double, GPCPatchDescriptor::nFeatures > coef; //!< Hyperplane coefficients + double rhs; //!< Bias term of the hyperplane + unsigned left; + unsigned right; + + bool operator==( const Node &n ) const { return coef == n.coef && rhs == n.rhs && left == n.left && right == n.right; } + }; + +private: + typedef GPCSamplesVector::iterator SIter; + + std::vector< Node > nodes; + GPCTrainingParams params; + + bool trainNode( size_t nodeId, SIter begin, SIter end, unsigned depth ); + +public: + void train( GPCTrainingSamples &samples, const GPCTrainingParams params = GPCTrainingParams() ); + + void write( FileStorage &fs ) const CV_OVERRIDE; + + void read( const FileNode &fn ) CV_OVERRIDE; + + unsigned findLeafForPatch( const GPCPatchDescriptor &descr ) const; + + static Ptr< GPCTree > create() { return makePtr< GPCTree >(); } + + bool operator==( const GPCTree &t ) const { return nodes == t.nodes; } + + int getDescriptorType() const { return params.descriptorType; } +}; + +template < int T > class GPCForest : public Algorithm +{ +private: + struct Trail + { + unsigned leaf[T]; //!< Inside which leaf of the tree 0..T the patch fell? + Point2i coord; //!< Patch coordinates. + + bool operator==( const Trail &trail ) const { return memcmp( leaf, trail.leaf, sizeof( leaf ) ) == 0; } + + bool operator<( const Trail &trail ) const + { + for ( int i = 0; i < T - 1; ++i ) + if ( leaf[i] != trail.leaf[i] ) + return leaf[i] < trail.leaf[i]; + return leaf[T - 1] < trail.leaf[T - 1]; + } + }; + + class ParallelTrailsFilling : public ParallelLoopBody + { + private: + const GPCForest *forest; + const std::vector< GPCPatchDescriptor > *descr; + std::vector< Trail > *trails; + + ParallelTrailsFilling &operator=( const ParallelTrailsFilling & ); + + public: + ParallelTrailsFilling( const GPCForest *_forest, const std::vector< GPCPatchDescriptor > *_descr, std::vector< Trail > *_trails ) + : forest( _forest ), descr( _descr ), trails( _trails ){}; + + void operator()( const Range &range ) const CV_OVERRIDE + { + for ( int t = range.start; t < range.end; ++t ) + for ( size_t i = 0; i < descr->size(); ++i ) + trails->at( i ).leaf[t] = forest->tree[t].findLeafForPatch( descr->at( i ) ); + } + }; + + GPCTree tree[T]; + +public: + /** @brief Train the forest using one sample set for every tree. + * Please, consider using the next method instead of this one for better quality. + */ + void train( GPCTrainingSamples &samples, const GPCTrainingParams params = GPCTrainingParams() ) + { + for ( int i = 0; i < T; ++i ) + tree[i].train( samples, params ); + } + + /** @brief Train the forest using individual samples for each tree. + * It is generally better to use this instead of the first method. + */ + void train( const std::vector< String > &imagesFrom, const std::vector< String > &imagesTo, const std::vector< String > >, + const GPCTrainingParams params = GPCTrainingParams() ) + { + for ( int i = 0; i < T; ++i ) + { + Ptr< GPCTrainingSamples > samples = + GPCTrainingSamples::create( imagesFrom, imagesTo, gt, params.descriptorType ); // Create training set for the tree + tree[i].train( *samples, params ); + } + } + + void train( InputArrayOfArrays imagesFrom, InputArrayOfArrays imagesTo, InputArrayOfArrays gt, + const GPCTrainingParams params = GPCTrainingParams() ) + { + for ( int i = 0; i < T; ++i ) + { + Ptr< GPCTrainingSamples > samples = + GPCTrainingSamples::create( imagesFrom, imagesTo, gt, params.descriptorType ); // Create training set for the tree + tree[i].train( *samples, params ); + } + } + + void write( FileStorage &fs ) const CV_OVERRIDE + { + fs << "ntrees" << T << "trees" + << "["; + for ( int i = 0; i < T; ++i ) + { + fs << "{"; + tree[i].write( fs ); + fs << "}"; + } + fs << "]"; + } + + void read( const FileNode &fn ) CV_OVERRIDE + { + CV_Assert( T <= (int)fn["ntrees"] ); + FileNodeIterator it = fn["trees"].begin(); + for ( int i = 0; i < T; ++i, ++it ) + tree[i].read( *it ); + } + + /** @brief Find correspondences between two images. + * @param[in] imgFrom First image in a sequence. + * @param[in] imgTo Second image in a sequence. + * @param[out] corr Output vector with pairs of corresponding points. + * @param[in] params Additional matching parameters for fine-tuning. + */ + void findCorrespondences( InputArray imgFrom, InputArray imgTo, std::vector< std::pair< Point2i, Point2i > > &corr, + const GPCMatchingParams params = GPCMatchingParams() ) const; + + static Ptr< GPCForest > create() { return makePtr< GPCForest >(); } +}; + +class CV_EXPORTS_W GPCDetails +{ +public: + static void dropOutliers( std::vector< std::pair< Point2i, Point2i > > &corr ); + + static void getAllDescriptorsForImage( const Mat *imgCh, std::vector< GPCPatchDescriptor > &descr, const GPCMatchingParams &mp, + int type ); + + static void getCoordinatesFromIndex( size_t index, Size sz, int &x, int &y ); +}; + +template < int T > +void GPCForest< T >::findCorrespondences( InputArray imgFrom, InputArray imgTo, std::vector< std::pair< Point2i, Point2i > > &corr, + const GPCMatchingParams params ) const +{ + CV_Assert( imgFrom.channels() == 3 ); + CV_Assert( imgTo.channels() == 3 ); + + Mat from, to; + imgFrom.getMat().convertTo( from, CV_32FC3 ); + imgTo.getMat().convertTo( to, CV_32FC3 ); + cvtColor( from, from, COLOR_BGR2YCrCb ); + cvtColor( to, to, COLOR_BGR2YCrCb ); + + Mat fromCh[3], toCh[3]; + split( from, fromCh ); + split( to, toCh ); + + std::vector< GPCPatchDescriptor > descr; + GPCDetails::getAllDescriptorsForImage( fromCh, descr, params, tree[0].getDescriptorType() ); + std::vector< Trail > trailsFrom( descr.size() ), trailsTo( descr.size() ); + + for ( size_t i = 0; i < descr.size(); ++i ) + GPCDetails::getCoordinatesFromIndex( i, from.size(), trailsFrom[i].coord.x, trailsFrom[i].coord.y ); + parallel_for_( Range( 0, T ), ParallelTrailsFilling( this, &descr, &trailsFrom ) ); + + descr.clear(); + GPCDetails::getAllDescriptorsForImage( toCh, descr, params, tree[0].getDescriptorType() ); + + for ( size_t i = 0; i < descr.size(); ++i ) + GPCDetails::getCoordinatesFromIndex( i, to.size(), trailsTo[i].coord.x, trailsTo[i].coord.y ); + parallel_for_( Range( 0, T ), ParallelTrailsFilling( this, &descr, &trailsTo ) ); + + std::sort( trailsFrom.begin(), trailsFrom.end() ); + std::sort( trailsTo.begin(), trailsTo.end() ); + + for ( size_t i = 0; i < trailsFrom.size(); ++i ) + { + bool uniq = true; + while ( i + 1 < trailsFrom.size() && trailsFrom[i] == trailsFrom[i + 1] ) + ++i, uniq = false; + if ( uniq ) + { + typename std::vector< Trail >::const_iterator lb = std::lower_bound( trailsTo.begin(), trailsTo.end(), trailsFrom[i] ); + if ( lb != trailsTo.end() && *lb == trailsFrom[i] && ( ( lb + 1 ) == trailsTo.end() || !( *lb == *( lb + 1 ) ) ) ) + corr.push_back( std::make_pair( trailsFrom[i].coord, lb->coord ) ); + } + } + + GPCDetails::dropOutliers( corr ); +} + +//! @} + +} // namespace optflow + +CV_EXPORTS void write( FileStorage &fs, const String &name, const optflow::GPCTree::Node &node ); + +CV_EXPORTS void read( const FileNode &fn, optflow::GPCTree::Node &node, optflow::GPCTree::Node ); +} // namespace cv + +#endif diff --git a/IPL/include/opencv/opencv2/phase_unwrapping.hpp b/IPL/include/opencv/opencv2/phase_unwrapping.hpp new file mode 100644 index 0000000..0e15e71 --- /dev/null +++ b/IPL/include/opencv/opencv2/phase_unwrapping.hpp @@ -0,0 +1,61 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// + // + // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. + // + // By downloading, copying, installing or using the software you agree to this license. + // If you do not agree to this license, do not download, install, + // copy or use the software. + // + // + // License Agreement + // For Open Source Computer Vision Library + // + // Copyright (C) 2015, OpenCV Foundation, all rights reserved. + // Third party copyrights are property of their respective owners. + // + // Redistribution and use in source and binary forms, with or without modification, + // are permitted provided that the following conditions are met: + // + // * Redistribution's of source code must retain the above copyright notice, + // this list of conditions and the following disclaimer. + // + // * Redistribution's in binary form must reproduce the above copyright notice, + // this list of conditions and the following disclaimer in the documentation + // and/or other materials provided with the distribution. + // + // * The name of the copyright holders may not be used to endorse or promote products + // derived from this software without specific prior written permission. + // + // This software is provided by the copyright holders and contributors "as is" and + // any express or implied warranties, including, but not limited to, the implied + // warranties of merchantability and fitness for a particular purpose are disclaimed. + // In no event shall the Intel Corporation or contributors be liable for any direct, + // indirect, incidental, special, exemplary, or consequential damages + // (including, but not limited to, procurement of substitute goods or services; + // loss of use, data, or profits; or business interruption) however caused + // and on any theory of liability, whether in contract, strict liability, + // or tort (including negligence or otherwise) arising in any way out of + // the use of this software, even if advised of the possibility of such damage. + // + //M*/ + +#include "opencv2/phase_unwrapping/phase_unwrapping.hpp" +#include "opencv2/phase_unwrapping/histogramphaseunwrapping.hpp" + +/** @defgroup phase_unwrapping Phase Unwrapping API + +Two-dimensional phase unwrapping is found in different applications like terrain elevation estimation +in synthetic aperture radar (SAR), field mapping in magnetic resonance imaging or as a way of finding +corresponding pixels in structured light reconstruction with sinusoidal patterns. + +Given a phase map, wrapped between [-pi; pi], phase unwrapping aims at finding the "true" phase map +by adding the right number of 2*pi to each pixel. + +The problem is straightforward for perfect wrapped phase map, but real data are usually not noise-free. +Among the different algorithms that were developed, quality-guided phase unwrapping methods are fast +and efficient. They follow a path that unwraps high quality pixels first, +avoiding error propagation from the start. + +In this module, a quality-guided phase unwrapping is implemented following the approach described in @cite histogramUnwrapping . + +*/ \ No newline at end of file diff --git a/IPL/include/opencv/opencv2/phase_unwrapping/histogramphaseunwrapping.hpp b/IPL/include/opencv/opencv2/phase_unwrapping/histogramphaseunwrapping.hpp new file mode 100644 index 0000000..177cb43 --- /dev/null +++ b/IPL/include/opencv/opencv2/phase_unwrapping/histogramphaseunwrapping.hpp @@ -0,0 +1,108 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// + // + // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. + // + // By downloading, copying, installing or using the software you agree to this license. + // If you do not agree to this license, do not download, install, + // copy or use the software. + // + // + // License Agreement + // For Open Source Computer Vision Library + // + // Copyright (C) 2015, OpenCV Foundation, all rights reserved. + // Third party copyrights are property of their respective owners. + // + // Redistribution and use in source and binary forms, with or without modification, + // are permitted provided that the following conditions are met: + // + // * Redistribution's of source code must retain the above copyright notice, + // this list of conditions and the following disclaimer. + // + // * Redistribution's in binary form must reproduce the above copyright notice, + // this list of conditions and the following disclaimer in the documentation + // and/or other materials provided with the distribution. + // + // * The name of the copyright holders may not be used to endorse or promote products + // derived from this software without specific prior written permission. + // + // This software is provided by the copyright holders and contributors "as is" and + // any express or implied warranties, including, but not limited to, the implied + // warranties of merchantability and fitness for a particular purpose are disclaimed. + // In no event shall the Intel Corporation or contributors be liable for any direct, + // indirect, incidental, special, exemplary, or consequential damages + // (including, but not limited to, procurement of substitute goods or services; + // loss of use, data, or profits; or business interruption) however caused + // and on any theory of liability, whether in contract, strict liability, + // or tort (including negligence or otherwise) arising in any way out of + // the use of this software, even if advised of the possibility of such damage. + // + //M*/ + +#ifndef __OPENCV_HISTOGRAM_PHASE_UNWRAPPING_HPP__ +#define __OPENCV_HISTOGRAM_PHASE_UNWRAPPING_HPP__ + +#include "opencv2/core.hpp" +#include +#include "opencv2/phase_unwrapping/phase_unwrapping.hpp" + +namespace cv { +namespace phase_unwrapping { +//! @addtogroup phase_unwrapping +//! @{ + + /** @brief Class implementing two-dimensional phase unwrapping based on @cite histogramUnwrapping + * This algorithm belongs to the quality-guided phase unwrapping methods. + * First, it computes a reliability map from second differences between a pixel and its eight neighbours. + * Reliability values lie between 0 and 16*pi*pi. Then, this reliability map is used to compute + * the reliabilities of "edges". An edge is an entity defined by two pixels that are connected + * horizontally or vertically. Its reliability is found by adding the the reliabilities of the + * two pixels connected through it. Edges are sorted in a histogram based on their reliability values. + * This histogram is then used to unwrap pixels, starting from the highest quality pixel. + + * The wrapped phase map and the unwrapped result are stored in CV_32FC1 Mat. + */ +class CV_EXPORTS_W HistogramPhaseUnwrapping : public PhaseUnwrapping +{ + +public: + /** + * @brief Parameters of phaseUnwrapping constructor. + + * @param width Phase map width. + * @param height Phase map height. + * @param histThresh Bins in the histogram are not of equal size. Default value is 3*pi*pi. The one before "histThresh" value are smaller. + * @param nbrOfSmallBins Number of bins between 0 and "histThresh". Default value is 10. + * @param nbrOfLargeBins Number of bins between "histThresh" and 32*pi*pi (highest edge reliability value). Default value is 5. + */ + struct CV_EXPORTS_W_SIMPLE Params + { + CV_WRAP Params(); + CV_PROP_RW int width; + CV_PROP_RW int height; + CV_PROP_RW float histThresh; + CV_PROP_RW int nbrOfSmallBins; + CV_PROP_RW int nbrOfLargeBins; + }; + /** + * @brief Constructor + + * @param parameters HistogramPhaseUnwrapping parameters HistogramPhaseUnwrapping::Params: width,height of the phase map and histogram characteristics. + */ + CV_WRAP + static Ptr create( const HistogramPhaseUnwrapping::Params ¶meters = + HistogramPhaseUnwrapping::Params() ); + + /** + * @brief Get the reliability map computed from the wrapped phase map. + + * @param reliabilityMap Image where the reliability map is stored. + */ + CV_WRAP + virtual void getInverseReliabilityMap( OutputArray reliabilityMap ) = 0; +}; + +//! @} +} +} +#endif \ No newline at end of file diff --git a/IPL/include/opencv/opencv2/phase_unwrapping/phase_unwrapping.hpp b/IPL/include/opencv/opencv2/phase_unwrapping/phase_unwrapping.hpp new file mode 100644 index 0000000..5b5cb51 --- /dev/null +++ b/IPL/include/opencv/opencv2/phase_unwrapping/phase_unwrapping.hpp @@ -0,0 +1,74 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// + // + // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. + // + // By downloading, copying, installing or using the software you agree to this license. + // If you do not agree to this license, do not download, install, + // copy or use the software. + // + // + // License Agreement + // For Open Source Computer Vision Library + // + // Copyright (C) 2015, OpenCV Foundation, all rights reserved. + // Third party copyrights are property of their respective owners. + // + // Redistribution and use in source and binary forms, with or without modification, + // are permitted provided that the following conditions are met: + // + // * Redistribution's of source code must retain the above copyright notice, + // this list of conditions and the following disclaimer. + // + // * Redistribution's in binary form must reproduce the above copyright notice, + // this list of conditions and the following disclaimer in the documentation + // and/or other materials provided with the distribution. + // + // * The name of the copyright holders may not be used to endorse or promote products + // derived from this software without specific prior written permission. + // + // This software is provided by the copyright holders and contributors "as is" and + // any express or implied warranties, including, but not limited to, the implied + // warranties of merchantability and fitness for a particular purpose are disclaimed. + // In no event shall the Intel Corporation or contributors be liable for any direct, + // indirect, incidental, special, exemplary, or consequential damages + // (including, but not limited to, procurement of substitute goods or services; + // loss of use, data, or profits; or business interruption) however caused + // and on any theory of liability, whether in contract, strict liability, + // or tort (including negligence or otherwise) arising in any way out of + // the use of this software, even if advised of the possibility of such damage. + // + //M*/ + +#ifndef __OPENCV_PHASE_UNWRAPPING_HPP__ +#define __OPENCV_PHASE_UNWRAPPING_HPP__ + +#include "opencv2/core.hpp" + +namespace cv { +namespace phase_unwrapping { +//! @addtogroup phase_unwrapping +//! @{ + + /** + @brief Abstract base class for phase unwrapping. + */ +class CV_EXPORTS_W PhaseUnwrapping : public virtual Algorithm +{ +public: + /** + * @brief Unwraps a 2D phase map. + + * @param wrappedPhaseMap The wrapped phase map that needs to be unwrapped. + * @param unwrappedPhaseMap The unwrapped phase map. + * @param shadowMask Optional parameter used when some pixels do not hold any phase information in the wrapped phase map. + */ + CV_WRAP + virtual void unwrapPhaseMap( InputArray wrappedPhaseMap, OutputArray unwrappedPhaseMap, + InputArray shadowMask = noArray() ) = 0; + +}; + +//! @} +} +} +#endif \ No newline at end of file diff --git a/IPL/include/opencv/opencv2/photo.hpp b/IPL/include/opencv/opencv2/photo.hpp index c093f65..c2e89a3 100644 --- a/IPL/include/opencv/opencv2/photo.hpp +++ b/IPL/include/opencv/opencv2/photo.hpp @@ -40,23 +40,44 @@ // //M*/ -#ifndef __OPENCV_PHOTO_HPP__ -#define __OPENCV_PHOTO_HPP__ +#ifndef OPENCV_PHOTO_HPP +#define OPENCV_PHOTO_HPP #include "opencv2/core.hpp" #include "opencv2/imgproc.hpp" /** @defgroup photo Computational Photography + +This module includes photo processing algorithms @{ + @defgroup photo_inpaint Inpainting @defgroup photo_denoise Denoising @defgroup photo_hdr HDR imaging This section describes high dynamic range imaging algorithms namely tonemapping, exposure alignment, camera calibration with multiple exposures and exposure fusion. + @defgroup photo_decolor Contrast Preserving Decolorization + +Useful links: + +http://www.cse.cuhk.edu.hk/leojia/projects/color2gray/index.html + @defgroup photo_clone Seamless Cloning + +Useful links: + +https://www.learnopencv.com/seamless-cloning-using-opencv-python-cpp + @defgroup photo_render Non-Photorealistic Rendering + +Useful links: + +http://www.inf.ufrgs.br/~eslgastal/DomainTransform + +https://www.learnopencv.com/non-photorealistic-rendering-using-opencv-python-c/ + @defgroup photo_c C API @} */ @@ -67,37 +88,24 @@ namespace cv //! @addtogroup photo //! @{ +//! @addtogroup photo_inpaint +//! @{ //! the inpainting algorithm enum { - INPAINT_NS = 0, // Navier-Stokes algorithm - INPAINT_TELEA = 1 // A. Telea algorithm -}; - -enum -{ - NORMAL_CLONE = 1, - MIXED_CLONE = 2, - MONOCHROME_TRANSFER = 3 -}; - -enum -{ - RECURS_FILTER = 1, - NORMCONV_FILTER = 2 + INPAINT_NS = 0, //!< Use Navier-Stokes based method + INPAINT_TELEA = 1 //!< Use the algorithm proposed by Alexandru Telea @cite Telea04 }; /** @brief Restores the selected region in an image using the region neighborhood. -@param src Input 8-bit 1-channel or 3-channel image. +@param src Input 8-bit, 16-bit unsigned or 32-bit float 1-channel or 8-bit 3-channel image. @param inpaintMask Inpainting mask, 8-bit 1-channel image. Non-zero pixels indicate the area that needs to be inpainted. @param dst Output image with the same size and type as src . @param inpaintRadius Radius of a circular neighborhood of each point inpainted that is considered by the algorithm. -@param flags Inpainting method that could be one of the following: -- **INPAINT_NS** Navier-Stokes based method [Navier01] -- **INPAINT_TELEA** Method by Alexandru Telea @cite Telea04 . +@param flags Inpainting method that could be cv::INPAINT_NS or cv::INPAINT_TELEA The function reconstructs the selected image area from the pixel near the area boundary. The function may be used to remove dust and scratches from a scanned photo, or to remove undesirable @@ -106,12 +114,14 @@ objects from still images or video. See @@ -216,7 +226,7 @@ CV_EXPORTS_W void fastNlMeansDenoisingMulti( InputArrayOfArrays srcImgs, OutputA int imgToDenoiseIndex, int temporalWindowSize, float h = 3, int templateWindowSize = 7, int searchWindowSize = 21); -/** @brief Modification of fastNlMeansDenoising function for images sequence where consequtive images have been +/** @brief Modification of fastNlMeansDenoising function for images sequence where consecutive images have been captured in small period of time. For example video. This version of the function is for grayscale images or for manual manipulation with colorspaces. For more details see @@ -328,8 +338,8 @@ class CV_EXPORTS_W Tonemap : public Algorithm public: /** @brief Tonemaps image - @param src source image - 32-bit 3-channel Mat - @param dst destination image - 32-bit 3-channel Mat with values in [0, 1] range + @param src source image - CV_32FC3 Mat (float 32 bits 3 channels) + @param dst destination image - CV_32FC3 Mat with values in [0, 1] range */ CV_WRAP virtual void process(InputArray src, OutputArray dst) = 0; @@ -376,43 +386,6 @@ results, default value is 0.85. */ CV_EXPORTS_W Ptr createTonemapDrago(float gamma = 1.0f, float saturation = 1.0f, float bias = 0.85f); -/** @brief This algorithm decomposes image into two layers: base layer and detail layer using bilateral filter -and compresses contrast of the base layer thus preserving all the details. - -This implementation uses regular bilateral filter from opencv. - -Saturation enhancement is possible as in ocvTonemapDrago. - -For more information see @cite DD02 . - */ -class CV_EXPORTS_W TonemapDurand : public Tonemap -{ -public: - - CV_WRAP virtual float getSaturation() const = 0; - CV_WRAP virtual void setSaturation(float saturation) = 0; - - CV_WRAP virtual float getContrast() const = 0; - CV_WRAP virtual void setContrast(float contrast) = 0; - - CV_WRAP virtual float getSigmaSpace() const = 0; - CV_WRAP virtual void setSigmaSpace(float sigma_space) = 0; - - CV_WRAP virtual float getSigmaColor() const = 0; - CV_WRAP virtual void setSigmaColor(float sigma_color) = 0; -}; - -/** @brief Creates TonemapDurand object - -@param gamma gamma value for gamma correction. See createTonemap -@param contrast resulting contrast on logarithmic scale, i. e. log(max / min), where max and min -are maximum and minimum luminance values of the resulting image. -@param saturation saturation enhancement value. See createTonemapDrago -@param sigma_space bilateral filter sigma in color space -@param sigma_color bilateral filter sigma in coordinate space - */ -CV_EXPORTS_W Ptr -createTonemapDurand(float gamma = 1.0f, float contrast = 4.0f, float saturation = 1.0f, float sigma_space = 2.0f, float sigma_color = 2.0f); /** @brief This is a global tonemapping operator that models human visual system. @@ -502,7 +475,7 @@ class CV_EXPORTS_W AlignMTB : public AlignExposures { public: CV_WRAP virtual void process(InputArrayOfArrays src, std::vector& dst, - InputArray times, InputArray response) = 0; + InputArray times, InputArray response) CV_OVERRIDE = 0; /** @brief Short version of process, that doesn't take extra arguments. @@ -591,7 +564,7 @@ class CV_EXPORTS_W CalibrateDebevec : public CalibrateCRF @param samples number of pixel locations to use @param lambda smoothness term weight. Greater values produce smoother results, but can alter the response. -@param random if true sample pixel locations are chosen at random, otherwise the form a +@param random if true sample pixel locations are chosen at random, otherwise they form a rectangular grid. */ CV_EXPORTS_W Ptr createCalibrateDebevec(int samples = 70, float lambda = 10.0f, bool random = false); @@ -646,7 +619,7 @@ class CV_EXPORTS_W MergeDebevec : public MergeExposures { public: CV_WRAP virtual void process(InputArrayOfArrays src, OutputArray dst, - InputArray times, InputArray response) = 0; + InputArray times, InputArray response) CV_OVERRIDE = 0; CV_WRAP virtual void process(InputArrayOfArrays src, OutputArray dst, InputArray times) = 0; }; @@ -669,7 +642,7 @@ class CV_EXPORTS_W MergeMertens : public MergeExposures { public: CV_WRAP virtual void process(InputArrayOfArrays src, OutputArray dst, - InputArray times, InputArray response) = 0; + InputArray times, InputArray response) CV_OVERRIDE = 0; /** @brief Short version of process, that doesn't take extra arguments. @param src vector of input images @@ -705,7 +678,7 @@ class CV_EXPORTS_W MergeRobertson : public MergeExposures { public: CV_WRAP virtual void process(InputArrayOfArrays src, OutputArray dst, - InputArray times, InputArray response) = 0; + InputArray times, InputArray response) CV_OVERRIDE = 0; CV_WRAP virtual void process(InputArrayOfArrays src, OutputArray dst, InputArray times) = 0; }; @@ -715,6 +688,9 @@ CV_EXPORTS_W Ptr createMergeRobertson(); //! @} photo_hdr +//! @addtogroup photo_decolor +//! @{ + /** @brief Transforms a color image to a grayscale image. It is a basic tool in digital printing, stylized black-and-white photograph rendering, and in many single channel image processing applications @cite CL12 . @@ -727,9 +703,27 @@ This function is to be applied on color images. */ CV_EXPORTS_W void decolor( InputArray src, OutputArray grayscale, OutputArray color_boost); +//! @} photo_decolor + //! @addtogroup photo_clone //! @{ + +//! seamlessClone algorithm flags +enum +{ + /** The power of the method is fully expressed when inserting objects with complex outlines into a new background*/ + NORMAL_CLONE = 1, + /** The classic method, color-based selection and alpha masking might be time consuming and often leaves an undesirable + halo. Seamless cloning, even averaged with the original image, is not effective. Mixed seamless cloning based on a loose selection proves effective.*/ + MIXED_CLONE = 2, + /** Monochrome transfer allows the user to easily replace certain features of one object by alternative features.*/ + MONOCHROME_TRANSFER = 3}; + + +/** @example samples/cpp/tutorial_code/photo/seamless_cloning/cloning_demo.cpp +An example using seamlessClone function +*/ /** @brief Image editing tasks concern either global changes (color/intensity corrections, filters, deformations) or local changes concerned to a selection. Here we are interested in achieving local changes, ones that are restricted to a region manually selected (ROI), in a seamless and effortless @@ -741,15 +735,7 @@ content @cite PM03 . @param mask Input 8-bit 1 or 3-channel image. @param p Point in dst image where object is placed. @param blend Output image with the same size and type as dst. -@param flags Cloning method that could be one of the following: -- **NORMAL_CLONE** The power of the method is fully expressed when inserting objects with -complex outlines into a new background -- **MIXED_CLONE** The classic method, color-based selection and alpha masking might be time -consuming and often leaves an undesirable halo. Seamless cloning, even averaged with the -original image, is not effective. Mixed seamless cloning based on a loose selection proves -effective. -- **FEATURE_EXCHANGE** Feature exchange allows the user to easily replace certain features of -one object by alternative features. +@param flags Cloning method that could be cv::NORMAL_CLONE, cv::MIXED_CLONE or cv::MONOCHROME_TRANSFER */ CV_EXPORTS_W void seamlessClone( InputArray src, InputArray dst, InputArray mask, Point p, OutputArray blend, int flags); @@ -784,18 +770,16 @@ CV_EXPORTS_W void illuminationChange(InputArray src, InputArray mask, OutputArra float alpha = 0.2f, float beta = 0.4f); /** @brief By retaining only the gradients at edge locations, before integrating with the Poisson solver, one -washes out the texture of the selected region, giving its contents a flat aspect. Here Canny Edge -Detector is used. +washes out the texture of the selected region, giving its contents a flat aspect. Here Canny Edge %Detector is used. @param src Input 8-bit 3-channel image. @param mask Input 8-bit 1 or 3-channel image. @param dst Output image with the same size and type as src. -@param low_threshold Range from 0 to 100. +@param low_threshold %Range from 0 to 100. @param high_threshold Value \> 100. @param kernel_size The size of the Sobel kernel to be used. -**NOTE:** - +@note The algorithm assumes that the color of the source image is close to that of the destination. This assumption means that when the colors don't match, the source image color gets tinted toward the color of the destination image. @@ -809,16 +793,21 @@ CV_EXPORTS_W void textureFlattening(InputArray src, InputArray mask, OutputArray //! @addtogroup photo_render //! @{ +//! Edge preserving filters +enum +{ + RECURS_FILTER = 1, //!< Recursive Filtering + NORMCONV_FILTER = 2 //!< Normalized Convolution Filtering +}; + /** @brief Filtering is the fundamental operation in image and video processing. Edge-preserving smoothing filters are used in many different applications @cite EM11 . @param src Input 8-bit 3-channel image. @param dst Output 8-bit 3-channel image. -@param flags Edge preserving filters: -- **RECURS_FILTER** = 1 -- **NORMCONV_FILTER** = 2 -@param sigma_s Range between 0 to 200. -@param sigma_r Range between 0 to 1. +@param flags Edge preserving filters: cv::RECURS_FILTER or cv::NORMCONV_FILTER +@param sigma_s %Range between 0 to 200. +@param sigma_r %Range between 0 to 1. */ CV_EXPORTS_W void edgePreservingFilter(InputArray src, OutputArray dst, int flags = 1, float sigma_s = 60, float sigma_r = 0.4f); @@ -827,20 +816,23 @@ CV_EXPORTS_W void edgePreservingFilter(InputArray src, OutputArray dst, int flag @param src Input 8-bit 3-channel image. @param dst Output image with the same size and type as src. -@param sigma_s Range between 0 to 200. -@param sigma_r Range between 0 to 1. +@param sigma_s %Range between 0 to 200. +@param sigma_r %Range between 0 to 1. */ CV_EXPORTS_W void detailEnhance(InputArray src, OutputArray dst, float sigma_s = 10, float sigma_r = 0.15f); +/** @example samples/cpp/tutorial_code/photo/non_photorealistic_rendering/npr_demo.cpp +An example using non-photorealistic line drawing functions +*/ /** @brief Pencil-like non-photorealistic line drawing @param src Input 8-bit 3-channel image. @param dst1 Output 8-bit 1-channel image. @param dst2 Output image with the same size and type as src. -@param sigma_s Range between 0 to 200. -@param sigma_r Range between 0 to 1. -@param shade_factor Range between 0 to 0.1. +@param sigma_s %Range between 0 to 200. +@param sigma_r %Range between 0 to 1. +@param shade_factor %Range between 0 to 0.1. */ CV_EXPORTS_W void pencilSketch(InputArray src, OutputArray dst1, OutputArray dst2, float sigma_s = 60, float sigma_r = 0.07f, float shade_factor = 0.02f); @@ -851,8 +843,8 @@ contrast while preserving, or enhancing, high-contrast features. @param src Input 8-bit 3-channel image. @param dst Output image with the same size and type as src. -@param sigma_s Range between 0 to 200. -@param sigma_r Range between 0 to 1. +@param sigma_s %Range between 0 to 200. +@param sigma_r %Range between 0 to 1. */ CV_EXPORTS_W void stylization(InputArray src, OutputArray dst, float sigma_s = 60, float sigma_r = 0.45f); @@ -863,8 +855,4 @@ CV_EXPORTS_W void stylization(InputArray src, OutputArray dst, float sigma_s = 6 } // cv -#ifndef DISABLE_OPENCV_24_COMPATIBILITY -#include "opencv2/photo/photo_c.h" -#endif - #endif diff --git a/IPL/include/opencv/opencv2/photo/cuda.hpp b/IPL/include/opencv/opencv2/photo/cuda.hpp index aeac1fa..a2f3816 100644 --- a/IPL/include/opencv/opencv2/photo/cuda.hpp +++ b/IPL/include/opencv/opencv2/photo/cuda.hpp @@ -40,8 +40,8 @@ // //M*/ -#ifndef __OPENCV_PHOTO_CUDA_HPP__ -#define __OPENCV_PHOTO_CUDA_HPP__ +#ifndef OPENCV_PHOTO_CUDA_HPP +#define OPENCV_PHOTO_CUDA_HPP #include "opencv2/core/cuda.hpp" @@ -129,4 +129,4 @@ CV_EXPORTS void fastNlMeansDenoisingColored(InputArray src, OutputArray dst, }} // namespace cv { namespace cuda { -#endif /* __OPENCV_PHOTO_CUDA_HPP__ */ +#endif /* OPENCV_PHOTO_CUDA_HPP */ diff --git a/IPL/include/opencv/opencv2/photo/legacy/constants_c.h b/IPL/include/opencv/opencv2/photo/legacy/constants_c.h new file mode 100644 index 0000000..ec1d440 --- /dev/null +++ b/IPL/include/opencv/opencv2/photo/legacy/constants_c.h @@ -0,0 +1,14 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#ifndef OPENCV_PHOTO_LEGACY_CONSTANTS_H +#define OPENCV_PHOTO_LEGACY_CONSTANTS_H + +enum InpaintingModes +{ + CV_INPAINT_NS =0, + CV_INPAINT_TELEA =1 +}; + +#endif // OPENCV_PHOTO_LEGACY_CONSTANTS_H diff --git a/IPL/include/opencv/opencv2/plot.hpp b/IPL/include/opencv/opencv2/plot.hpp index 8243985..06a12a1 100644 --- a/IPL/include/opencv/opencv2/plot.hpp +++ b/IPL/include/opencv/opencv2/plot.hpp @@ -59,6 +59,9 @@ namespace cv { namespace plot { + //! @addtogroup plot + //! @{ + class CV_EXPORTS_W Plot2d : public Algorithm { public: @@ -68,17 +71,48 @@ namespace cv CV_WRAP virtual void setMaxX(double _plotMaxX) = 0; CV_WRAP virtual void setMaxY(double _plotMaxY) = 0; CV_WRAP virtual void setPlotLineWidth(int _plotLineWidth) = 0; + /** + * @brief Switches data visualization mode + * + * @param _needPlotLine if true then neighbour plot points will be connected by lines. + * In other case data will be plotted as a set of standalone points. + */ + CV_WRAP virtual void setNeedPlotLine(bool _needPlotLine) = 0; CV_WRAP virtual void setPlotLineColor(Scalar _plotLineColor) = 0; CV_WRAP virtual void setPlotBackgroundColor(Scalar _plotBackgroundColor) = 0; CV_WRAP virtual void setPlotAxisColor(Scalar _plotAxisColor) = 0; CV_WRAP virtual void setPlotGridColor(Scalar _plotGridColor) = 0; CV_WRAP virtual void setPlotTextColor(Scalar _plotTextColor) = 0; CV_WRAP virtual void setPlotSize(int _plotSizeWidth, int _plotSizeHeight) = 0; - CV_WRAP virtual void render(Mat &_plotResult) = 0; - }; + CV_WRAP virtual void setShowGrid(bool needShowGrid) = 0; + CV_WRAP virtual void setShowText(bool needShowText) = 0; + CV_WRAP virtual void setGridLinesNumber(int gridLinesNumber) = 0; + CV_WRAP virtual void setInvertOrientation(bool _invertOrientation) = 0; + /** + * @brief Sets the index of a point which coordinates will be printed on the top left corner of the plot (if ShowText flag is true). + * + * @param pointIdx index of the required point in data array. + */ + CV_WRAP virtual void setPointIdxToPrint(int pointIdx) = 0; + CV_WRAP virtual void render(OutputArray _plotResult) = 0; - CV_EXPORTS_W Ptr createPlot2d(Mat data); - CV_EXPORTS_W Ptr createPlot2d(Mat dataX, Mat dataY); + /** + * @brief Creates Plot2d object + * + * @param data \f$1xN\f$ or \f$Nx1\f$ matrix containing \f$Y\f$ values of points to plot. \f$X\f$ values + * will be equal to indexes of correspondind elements in data matrix. + */ + CV_WRAP static Ptr create(InputArray data); + + /** + * @brief Creates Plot2d object + * + * @param dataX \f$1xN\f$ or \f$Nx1\f$ matrix \f$X\f$ values of points to plot. + * @param dataY \f$1xN\f$ or \f$Nx1\f$ matrix containing \f$Y\f$ values of points to plot. + */ + CV_WRAP static Ptr create(InputArray dataX, InputArray dataY); + }; + //! @} } } diff --git a/IPL/include/opencv/opencv2/quality.hpp b/IPL/include/opencv/opencv2/quality.hpp new file mode 100644 index 0000000..8470f08 --- /dev/null +++ b/IPL/include/opencv/opencv2/quality.hpp @@ -0,0 +1,15 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#ifndef OPENCV_QUALITY_HPP +#define OPENCV_QUALITY_HPP + +#include "quality/qualitybase.hpp" +#include "quality/qualitymse.hpp" +#include "quality/qualitypsnr.hpp" +#include "quality/qualityssim.hpp" +#include "quality/qualitygmsd.hpp" +#include "quality/qualitybrisque.hpp" + +#endif \ No newline at end of file diff --git a/IPL/include/opencv/opencv2/quality/quality_utils.hpp b/IPL/include/opencv/opencv2/quality/quality_utils.hpp new file mode 100644 index 0000000..4b3df51 --- /dev/null +++ b/IPL/include/opencv/opencv2/quality/quality_utils.hpp @@ -0,0 +1,109 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#ifndef OPENCV_QUALITY_QUALITY_UTILS_HPP +#define OPENCV_QUALITY_QUALITY_UTILS_HPP + +#include "qualitybase.hpp" + +namespace cv +{ +namespace quality +{ +namespace quality_utils +{ + +// default type of matrix to expand to +static CV_CONSTEXPR const int EXPANDED_MAT_DEFAULT_TYPE = CV_32F; + +// convert inputarray to specified mat type. set type == -1 to preserve existing type +template +inline R extract_mat(InputArray in, const int type = -1) +{ + R result = {}; + if ( in.isMat() ) + in.getMat().convertTo( result, (type != -1) ? type : in.getMat().type()); + else if ( in.isUMat() ) + in.getUMat().convertTo( result, (type != -1) ? type : in.getUMat().type()); + else + CV_Error(Error::StsNotImplemented, "Unsupported input type"); + + return result; +} + +// extract and expand matrix to target type +template +inline R expand_mat( InputArray src, int TYPE_DEFAULT = EXPANDED_MAT_DEFAULT_TYPE) +{ + auto result = extract_mat(src, -1); + + // by default, expand to 32F unless we already have >= 32 bits, then go to 64 + // if/when we can detect OpenCL CV_16F support, opt for that when input depth == 8 + // note that this may impact the precision of the algorithms and would need testing + int type = TYPE_DEFAULT; + + switch (result.depth()) + { + case CV_32F: + case CV_32S: + case CV_64F: + type = CV_64F; + }; // switch + + result.convertTo(result, type); + return result; +} + +// return mat of observed min/max pair per column +// row 0: min per column +// row 1: max per column +// template +inline cv::Mat get_column_range( const cv::Mat& data ) +{ + CV_Assert(data.channels() == 1); + CV_Assert(data.rows > 0); + + cv::Mat result( cv::Size( data.cols, 2 ), data.type() ); + + auto + row_min = result.row(0) + , row_max = result.row(1) + ; + + // set initial min/max + data.row(0).copyTo(row_min); + data.row(0).copyTo(row_max); + + for (int y = 1; y < data.rows; ++y) + { + auto row = data.row(y); + cv::min(row,row_min, row_min); + cv::max(row, row_max, row_max); + } + return result; +} // get_column_range + +// linear scale of each column from min to max +// range is column-wise pair of observed min/max. See get_column_range +template +inline void scale( cv::Mat& mat, const cv::Mat& range, const T min, const T max ) +{ + // value = lower + (upper - lower) * (value - feature_min[index]) / (feature_max[index] - feature_min[index]); + // where [lower] = lower bound, [upper] = upper bound + + for (int y = 0; y < mat.rows; ++y) + { + auto row = mat.row(y); + auto row_min = range.row(0); + auto row_max = range.row(1); + + for (int x = 0; x < mat.cols; ++x) + row.at(x) = min + (max - min) * (row.at(x) - row_min.at(x) ) / (row_max.at(x) - row_min.at(x)); + } +} + +} // quality_utils +} // quality +} // cv +#endif \ No newline at end of file diff --git a/IPL/include/opencv/opencv2/quality/qualitybase.hpp b/IPL/include/opencv/opencv2/quality/qualitybase.hpp new file mode 100644 index 0000000..1f4887f --- /dev/null +++ b/IPL/include/opencv/opencv2/quality/qualitybase.hpp @@ -0,0 +1,63 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#ifndef OPENCV_QUALITYBASE_HPP +#define OPENCV_QUALITYBASE_HPP + +#include + +/** +@defgroup quality Image Quality Analysis (IQA) API +*/ + +namespace cv +{ +namespace quality +{ + +//! @addtogroup quality +//! @{ + +/************************************ Quality Base Class ************************************/ +class CV_EXPORTS_W QualityBase + : public virtual Algorithm +{ +public: + + /** @brief Destructor */ + virtual ~QualityBase() = default; + + /** + @brief Compute quality score per channel with the per-channel score in each element of the resulting cv::Scalar. See specific algorithm for interpreting result scores + @param img comparison image, or image to evalute for no-reference quality algorithms + */ + virtual CV_WRAP cv::Scalar compute( InputArray img ) = 0; + + /** @brief Returns output quality map that was generated during computation, if supported by the algorithm */ + virtual CV_WRAP void getQualityMap(OutputArray dst) const + { + if (!dst.needed() || _qualityMap.empty() ) + return; + dst.assign(_qualityMap); + } + + /** @brief Implements Algorithm::clear() */ + CV_WRAP void clear() CV_OVERRIDE { _qualityMap = _mat_type(); Algorithm::clear(); } + + /** @brief Implements Algorithm::empty() */ + CV_WRAP bool empty() const CV_OVERRIDE { return _qualityMap.empty(); } + +protected: + + /** @brief internal mat type default */ + using _mat_type = cv::UMat; + + /** @brief Output quality maps if generated by algorithm */ + _mat_type _qualityMap; + +}; // QualityBase +//! @} +} // quality +} // cv +#endif \ No newline at end of file diff --git a/IPL/include/opencv/opencv2/quality/qualitybrisque.hpp b/IPL/include/opencv/opencv2/quality/qualitybrisque.hpp new file mode 100644 index 0000000..9732f82 --- /dev/null +++ b/IPL/include/opencv/opencv2/quality/qualitybrisque.hpp @@ -0,0 +1,82 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#ifndef OPENCV_QUALITY_QUALITYBRISQUE_HPP +#define OPENCV_QUALITY_QUALITYBRISQUE_HPP + +#include "qualitybase.hpp" +#include "opencv2/ml.hpp" + +namespace cv +{ +namespace quality +{ + +/** +@brief BRISQUE (Blind/Referenceless Image Spatial Quality Evaluator) is a No Reference Image Quality Assessment (NR-IQA) algorithm. + +BRISQUE computes a score based on extracting Natural Scene Statistics (https://en.wikipedia.org/wiki/Scene_statistics) +and calculating feature vectors. See Mittal et al. @cite Mittal2 for original paper and original implementation @cite Mittal2_software . + +A trained model is provided in the /samples/ directory and is trained on the LIVE-R2 database @cite Sheikh as in the original implementation. +When evaluated against the TID2008 database @cite Ponomarenko , the SROCC is -0.8424 versus the SROCC of -0.8354 in the original implementation. +C++ code for the BRISQUE LIVE-R2 trainer and TID2008 evaluator are also provided in the /samples/ directory. +*/ +class CV_EXPORTS_W QualityBRISQUE : public QualityBase { +public: + + /** @brief Computes BRISQUE quality score for input image + @param img Image for which to compute quality + @returns cv::Scalar with the score in the first element. The score ranges from 0 (best quality) to 100 (worst quality) + */ + CV_WRAP cv::Scalar compute( InputArray img ) CV_OVERRIDE; + + /** + @brief Create an object which calculates quality + @param model_file_path cv::String which contains a path to the BRISQUE model data, eg. /path/to/brisque_model_live.yml + @param range_file_path cv::String which contains a path to the BRISQUE range data, eg. /path/to/brisque_range_live.yml + */ + CV_WRAP static Ptr create( const cv::String& model_file_path, const cv::String& range_file_path ); + + /** + @brief Create an object which calculates quality + @param model cv::Ptr which contains a loaded BRISQUE model + @param range cv::Mat which contains BRISQUE range data + */ + CV_WRAP static Ptr create( const cv::Ptr& model, const cv::Mat& range ); + + /** + @brief static method for computing quality + @param img image for which to compute quality + @param model_file_path cv::String which contains a path to the BRISQUE model data, eg. /path/to/brisque_model_live.yml + @param range_file_path cv::String which contains a path to the BRISQUE range data, eg. /path/to/brisque_range_live.yml + @returns cv::Scalar with the score in the first element. The score ranges from 0 (best quality) to 100 (worst quality) + */ + CV_WRAP static cv::Scalar compute( InputArray img, const cv::String& model_file_path, const cv::String& range_file_path ); + + /** + @brief static method for computing image features used by the BRISQUE algorithm + @param img image (BGR(A) or grayscale) for which to compute features + @param features output row vector of features to cv::Mat or cv::UMat + */ + CV_WRAP static void computeFeatures(InputArray img, OutputArray features); + +protected: + + cv::Ptr _model = nullptr; + cv::Mat _range; + + /** @brief Internal constructor */ + QualityBRISQUE( const cv::String& model_file_path, const cv::String& range_file_path ); + + /** @brief Internal constructor */ + QualityBRISQUE(const cv::Ptr& model, const cv::Mat& range ) + : _model{ model } + , _range{ range } + {} + +}; // QualityBRISQUE +} // quality +} // cv +#endif diff --git a/IPL/include/opencv/opencv2/quality/qualitygmsd.hpp b/IPL/include/opencv/opencv2/quality/qualitygmsd.hpp new file mode 100644 index 0000000..3a9cd0b --- /dev/null +++ b/IPL/include/opencv/opencv2/quality/qualitygmsd.hpp @@ -0,0 +1,92 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#ifndef OPENCV_QUALITY_QUALITYGMSD_HPP +#define OPENCV_QUALITY_QUALITYGMSD_HPP + +#include "qualitybase.hpp" + +namespace cv +{ +namespace quality +{ + +/** +@brief Full reference GMSD algorithm +http://www4.comp.polyu.edu.hk/~cslzhang/IQA/GMSD/GMSD.htm +*/ +class CV_EXPORTS_W QualityGMSD + : public QualityBase { +public: + + /** + @brief Compute GMSD + @param cmp comparison image + @returns cv::Scalar with per-channel quality value. Values range from 0 (worst) to 1 (best) + */ + CV_WRAP cv::Scalar compute( InputArray cmp ) CV_OVERRIDE; + + /** @brief Implements Algorithm::empty() */ + CV_WRAP bool empty() const CV_OVERRIDE { return _refImgData.empty() && QualityBase::empty(); } + + /** @brief Implements Algorithm::clear() */ + CV_WRAP void clear() CV_OVERRIDE { _refImgData = _mat_data(); QualityBase::clear(); } + + /** + @brief Create an object which calculates image quality + @param ref reference image + */ + CV_WRAP static Ptr create( InputArray ref ); + + /** + @brief static method for computing quality + @param ref reference image + @param cmp comparison image + @param qualityMap output quality map, or cv::noArray() + @returns cv::Scalar with per-channel quality value. Values range from 0 (worst) to 1 (best) + */ + CV_WRAP static cv::Scalar compute( InputArray ref, InputArray cmp, OutputArray qualityMap ); + +protected: + + // holds computed values for a mat + struct _mat_data + { + // internal mat type + using mat_type = QualityBase::_mat_type; + + mat_type + gradient_map + , gradient_map_squared + ; + + // allow default construction + _mat_data() = default; + + // construct from mat_type + _mat_data(const mat_type&); + + // construct from inputarray + _mat_data(InputArray); + + // returns flag if empty + bool empty() const { return this->gradient_map.empty() && this->gradient_map_squared.empty(); } + + // compute for a single frame + static std::pair compute(const _mat_data& lhs, const _mat_data& rhs); + + }; // mat_data + + /** @brief Reference image data */ + _mat_data _refImgData; + + // internal constructor + QualityGMSD(_mat_data refImgData) + : _refImgData(std::move(refImgData)) + {} + +}; // QualityGMSD +} // quality +} // cv +#endif \ No newline at end of file diff --git a/IPL/include/opencv/opencv2/quality/qualitymse.hpp b/IPL/include/opencv/opencv2/quality/qualitymse.hpp new file mode 100644 index 0000000..7763a58 --- /dev/null +++ b/IPL/include/opencv/opencv2/quality/qualitymse.hpp @@ -0,0 +1,64 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#ifndef OPENCV_QUALITY_QUALITYMSE_HPP +#define OPENCV_QUALITY_QUALITYMSE_HPP + +#include "qualitybase.hpp" + +namespace cv +{ +namespace quality +{ + +/** +@brief Full reference mean square error algorithm https://en.wikipedia.org/wiki/Mean_squared_error +*/ +class CV_EXPORTS_W QualityMSE : public QualityBase { +public: + + /** @brief Computes MSE for reference images supplied in class constructor and provided comparison images + @param cmpImgs Comparison image(s) + @returns cv::Scalar with per-channel quality values. Values range from 0 (best) to potentially max float (worst) + */ + CV_WRAP cv::Scalar compute( InputArrayOfArrays cmpImgs ) CV_OVERRIDE; + + /** @brief Implements Algorithm::empty() */ + CV_WRAP bool empty() const CV_OVERRIDE { return _ref.empty() && QualityBase::empty(); } + + /** @brief Implements Algorithm::clear() */ + CV_WRAP void clear() CV_OVERRIDE { _ref = _mat_type(); QualityBase::clear(); } + + /** + @brief Create an object which calculates quality + @param ref input image to use as the reference for comparison + */ + CV_WRAP static Ptr create(InputArray ref); + + /** + @brief static method for computing quality + @param ref reference image + @param cmp comparison image= + @param qualityMap output quality map, or cv::noArray() + @returns cv::Scalar with per-channel quality values. Values range from 0 (best) to max float (worst) + */ + CV_WRAP static cv::Scalar compute( InputArray ref, InputArray cmp, OutputArray qualityMap ); + +protected: + + /** @brief Reference image, converted to internal mat type */ + QualityBase::_mat_type _ref; + + /** + @brief Constructor + @param ref reference image, converted to internal type + */ + QualityMSE(QualityBase::_mat_type ref) + : _ref(std::move(ref)) + {} + +}; // QualityMSE +} // quality +} // cv +#endif \ No newline at end of file diff --git a/IPL/include/opencv/opencv2/quality/qualitypsnr.hpp b/IPL/include/opencv/opencv2/quality/qualitypsnr.hpp new file mode 100644 index 0000000..59b7325 --- /dev/null +++ b/IPL/include/opencv/opencv2/quality/qualitypsnr.hpp @@ -0,0 +1,120 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#ifndef OPENCV_QUALITY_QUALITYPSNR_HPP +#define OPENCV_QUALITY_QUALITYPSNR_HPP + +#include // numeric_limits +#include "qualitybase.hpp" +#include "qualitymse.hpp" + +namespace cv +{ +namespace quality +{ + +/** +@brief Full reference peak signal to noise ratio (PSNR) algorithm https://en.wikipedia.org/wiki/Peak_signal-to-noise_ratio +*/ +class CV_EXPORTS_W QualityPSNR + : public QualityBase { + +public: + + /** @brief Default maximum pixel value */ +#if __cplusplus >= 201103L || (defined(_MSC_VER) && _MSC_VER >= 1900/*MSVS 2015*/) + static constexpr double MAX_PIXEL_VALUE_DEFAULT = 255.; +#else + // support MSVS 2013 + static const int MAX_PIXEL_VALUE_DEFAULT = 255; +#endif + + /** + @brief Create an object which calculates quality + @param ref input image to use as the source for comparison + @param maxPixelValue maximum per-channel value for any individual pixel; eg 255 for uint8 image + */ + CV_WRAP static Ptr create( InputArray ref, double maxPixelValue = QualityPSNR::MAX_PIXEL_VALUE_DEFAULT ) + { + return Ptr(new QualityPSNR(QualityMSE::create(ref), maxPixelValue)); + } + + /** + @brief Compute the PSNR + @param cmp Comparison image + @returns Per-channel PSNR value, or std::numeric_limits::infinity() if the MSE between the two images == 0 + */ + CV_WRAP cv::Scalar compute( InputArray cmp ) CV_OVERRIDE + { + auto result = _qualityMSE->compute( cmp ); + _qualityMSE->getQualityMap(_qualityMap); // copy from internal obj to this obj + return _mse_to_psnr( + result + , _maxPixelValue + ); + } + + /** @brief Implements Algorithm::empty() */ + CV_WRAP bool empty() const CV_OVERRIDE { return _qualityMSE->empty() && QualityBase::empty(); } + + /** @brief Implements Algorithm::clear() */ + CV_WRAP void clear() CV_OVERRIDE { _qualityMSE->clear(); QualityBase::clear(); } + + /** + @brief static method for computing quality + @param ref reference image + @param cmp comparison image + @param qualityMap output quality map, or cv::noArray() + @param maxPixelValue maximum per-channel value for any individual pixel; eg 255 for uint8 image + @returns PSNR value, or std::numeric_limits::infinity() if the MSE between the two images == 0 + */ + CV_WRAP static cv::Scalar compute( InputArray ref, InputArray cmp, OutputArray qualityMap, double maxPixelValue = QualityPSNR::MAX_PIXEL_VALUE_DEFAULT) + { + return _mse_to_psnr( + QualityMSE::compute(ref, cmp, qualityMap) + , maxPixelValue + ); + } + + /** @brief return the maximum pixel value used for PSNR computation */ + CV_WRAP double getMaxPixelValue() const { return _maxPixelValue; } + + /** + @brief sets the maximum pixel value used for PSNR computation + @param val Maximum pixel value + */ + CV_WRAP void setMaxPixelValue(double val) { this->_maxPixelValue = val; } + +protected: + + Ptr _qualityMSE; + double _maxPixelValue = QualityPSNR::MAX_PIXEL_VALUE_DEFAULT; + + /** @brief Constructor */ + QualityPSNR( Ptr qualityMSE, double maxPixelValue ) + : _qualityMSE(std::move(qualityMSE)) + , _maxPixelValue(maxPixelValue) + {} + + // convert mse to psnr + static double _mse_to_psnr(double mse, double max_pixel_value) + { + return (mse == 0.) + ? std::numeric_limits::infinity() + : 10. * std::log10((max_pixel_value * max_pixel_value) / mse) + ; + } + + // convert scalar of mses to psnrs + static cv::Scalar _mse_to_psnr(cv::Scalar mse, double max_pixel_value) + { + for (int i = 0; i < mse.rows; ++i) + mse(i) = _mse_to_psnr(mse(i), max_pixel_value); + return mse; + } + +}; // QualityPSNR +} // quality +} // cv +#endif \ No newline at end of file diff --git a/IPL/include/opencv/opencv2/quality/qualityssim.hpp b/IPL/include/opencv/opencv2/quality/qualityssim.hpp new file mode 100644 index 0000000..edbd3ae --- /dev/null +++ b/IPL/include/opencv/opencv2/quality/qualityssim.hpp @@ -0,0 +1,97 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#ifndef OPENCV_QUALITY_QUALITYSSIM_HPP +#define OPENCV_QUALITY_QUALITYSSIM_HPP + +#include "qualitybase.hpp" + +namespace cv +{ +namespace quality +{ + +/** +@brief Full reference structural similarity algorithm https://en.wikipedia.org/wiki/Structural_similarity +*/ +class CV_EXPORTS_W QualitySSIM + : public QualityBase { +public: + + /** + @brief Computes SSIM + @param cmp Comparison image + @returns cv::Scalar with per-channel quality values. Values range from 0 (worst) to 1 (best) + */ + CV_WRAP cv::Scalar compute( InputArray cmp ) CV_OVERRIDE; + + /** @brief Implements Algorithm::empty() */ + CV_WRAP bool empty() const CV_OVERRIDE { return _refImgData.empty() && QualityBase::empty(); } + + /** @brief Implements Algorithm::clear() */ + CV_WRAP void clear() CV_OVERRIDE { _refImgData = _mat_data(); QualityBase::clear(); } + + /** + @brief Create an object which calculates quality + @param ref input image to use as the reference image for comparison + */ + CV_WRAP static Ptr create( InputArray ref ); + + /** + @brief static method for computing quality + @param ref reference image + @param cmp comparison image + @param qualityMap output quality map, or cv::noArray() + @returns cv::Scalar with per-channel quality values. Values range from 0 (worst) to 1 (best) + */ + CV_WRAP static cv::Scalar compute( InputArray ref, InputArray cmp, OutputArray qualityMap ); + +protected: + + // holds computed values for a mat + struct _mat_data + { + // internal mat type + using mat_type = QualityBase::_mat_type; + + mat_type + I + , I_2 + , mu + , mu_2 + , sigma_2 + ; + + // allow default construction + _mat_data() = default; + + // construct from mat_type + _mat_data(const mat_type&); + + // construct from inputarray + _mat_data(InputArray); + + // return flag if this is empty + bool empty() const { return I.empty() && I_2.empty() && mu.empty() && mu_2.empty() && sigma_2.empty(); } + + // computes ssim and quality map for single frame + static std::pair compute(const _mat_data& lhs, const _mat_data& rhs); + + }; // mat_data + + /** @brief Reference image data */ + _mat_data _refImgData; + + /** + @brief Constructor + @param refImgData reference image, converted to internal type + */ + QualitySSIM( _mat_data refImgData ) + : _refImgData( std::move(refImgData) ) + {} + +}; // QualitySSIM +} // quality +} // cv +#endif \ No newline at end of file diff --git a/IPL/include/opencv/opencv2/rapid.hpp b/IPL/include/opencv/opencv2/rapid.hpp new file mode 100644 index 0000000..829bd86 --- /dev/null +++ b/IPL/include/opencv/opencv2/rapid.hpp @@ -0,0 +1,126 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#ifndef OPENCV_RAPID_HPP_ +#define OPENCV_RAPID_HPP_ + +#include +#include + +/** +@defgroup rapid silhouette based 3D object tracking + +implements "RAPID-a video rate object tracker" @cite harris1990rapid with the dynamic control point extraction of @cite drummond2002real +*/ + +namespace cv +{ +namespace rapid +{ +//! @addtogroup rapid +//! @{ + +/** + * Debug draw markers of matched correspondences onto a lineBundle + * @param bundle the lineBundle + * @param srcLocations the according source locations + * @param newLocations matched source locations + * @param colors colors for the markers. Defaults to white. + */ +CV_EXPORTS_W void drawCorrespondencies(InputOutputArray bundle, InputArray srcLocations, + InputArray newLocations, InputArray colors = noArray()); +/** + * Debug draw search lines onto an image + * @param img the output image + * @param locations the source locations of a line bundle + * @param color the line color + */ +CV_EXPORTS_W void drawSearchLines(InputOutputArray img, InputArray locations, const Scalar& color); + +/** + * Draw a wireframe of a triangle mesh + * @param img the output image + * @param pts2d the 2d points obtained by @ref projectPoints + * @param tris triangle face connectivity + * @param color line color + * @param type line type. See @ref LineTypes. + * @param cullBackface enable back-face culling based on CCW order + */ +CV_EXPORTS_W void drawWireframe(InputOutputArray img, InputArray pts2d, InputArray tris, + const Scalar& color, int type = LINE_8, bool cullBackface = false); +/** + * Extract control points from the projected silhouette of a mesh + * + * see @cite drummond2002real Sec 2.1, Step b + * @param num number of control points + * @param len search radius (used to restrict the ROI) + * @param pts3d the 3D points of the mesh + * @param rvec rotation between mesh and camera + * @param tvec translation between mesh and camera + * @param K camera intrinsic + * @param imsize size of the video frame + * @param tris triangle face connectivity + * @param ctl2d the 2D locations of the control points + * @param ctl3d matching 3D points of the mesh + */ +CV_EXPORTS_W void extractControlPoints(int num, int len, InputArray pts3d, InputArray rvec, InputArray tvec, + InputArray K, const Size& imsize, InputArray tris, OutputArray ctl2d, + OutputArray ctl3d); +/** + * Extract the line bundle from an image + * @param len the search radius. The bundle will have `2*len + 1` columns. + * @param ctl2d the search lines will be centered at this points and orthogonal to the contour defined by + * them. The bundle will have as many rows. + * @param img the image to read the pixel intensities values from + * @param bundle line bundle image with size `ctl2d.rows() x (2 * len + 1)` and the same type as @p img + * @param srcLocations the source pixel locations of @p bundle in @p img as CV_16SC2 + */ +CV_EXPORTS_W void extractLineBundle(int len, InputArray ctl2d, InputArray img, OutputArray bundle, + OutputArray srcLocations); + +/** + * Find corresponding image locations by searching for a maximal sobel edge along the search line (a single + * row in the bundle) + * @param bundle the line bundle + * @param srcLocations the according source image location + * @param newLocations image locations with maximal edge along the search line + * @param response the sobel response for the selected point + */ +CV_EXPORTS_W void findCorrespondencies(InputArray bundle, InputArray srcLocations, OutputArray newLocations, + OutputArray response = noArray()); + +/** + * Filter corresponding 2d and 3d points based on mask + * @param pts2d 2d points + * @param pts3d 3d points + * @param mask mask containing non-zero values for the elements to be retained + */ +CV_EXPORTS_W void filterCorrespondencies(InputOutputArray pts2d, InputOutputArray pts3d, InputArray mask); + +/** + * High level function to execute a single rapid @cite harris1990rapid iteration + * + * 1. @ref extractControlPoints + * 2. @ref extractLineBundle + * 3. @ref findCorrespondencies + * 4. @ref filterCorrespondencies + * 5. @ref solvePnPRefineLM + * + * @param img the video frame + * @param num number of search lines + * @param len search line radius + * @param pts3d the 3D points of the mesh + * @param tris triangle face connectivity + * @param K camera matrix + * @param rvec rotation between mesh and camera. Input values are used as an initial solution. + * @param tvec translation between mesh and camera. Input values are used as an initial solution. + * @return ratio of search lines that could be extracted and matched + */ +CV_EXPORTS_W float rapid(InputArray img, int num, int len, InputArray pts3d, InputArray tris, InputArray K, + InputOutputArray rvec, InputOutputArray tvec); +//! @} +} /* namespace rapid */ +} /* namespace cv */ + +#endif /* OPENCV_RAPID_HPP_ */ diff --git a/IPL/include/opencv/opencv2/reg/map.hpp b/IPL/include/opencv/opencv2/reg/map.hpp index 26b29e3..a885a28 100644 --- a/IPL/include/opencv/opencv2/reg/map.hpp +++ b/IPL/include/opencv/opencv2/reg/map.hpp @@ -121,13 +121,13 @@ namespace reg { The class is only used to define the common interface for any possible map. */ -class CV_EXPORTS Map +class CV_EXPORTS_W Map { public: /*! * Virtual destructor */ - virtual ~Map(void); + virtual ~Map(); /*! * Warps image to a new coordinate frame. The calculation is img2(x)=img1(T^{-1}(x)), as we @@ -136,7 +136,7 @@ class CV_EXPORTS Map * \param[in] img1 Original image * \param[out] img2 Warped image */ - virtual void warp(const cv::Mat& img1, cv::Mat& img2) const; + CV_WRAP virtual void warp(InputArray img1, OutputArray img2) const; /*! * Warps image to a new coordinate frame. The calculation is img2(x)=img1(T(x)), so in fact @@ -145,27 +145,27 @@ class CV_EXPORTS Map * \param[in] img1 Original image * \param[out] img2 Warped image */ - virtual void inverseWarp(const cv::Mat& img1, cv::Mat& img2) const = 0; + CV_WRAP virtual void inverseWarp(InputArray img1, OutputArray img2) const = 0; /*! * Calculates the inverse map * \return Inverse map */ - virtual cv::Ptr inverseMap(void) const = 0; + CV_WRAP virtual cv::Ptr inverseMap() const = 0; /*! * Changes the map composing the current transformation with the one provided in the call. * The order is first the current transformation, then the input argument. * \param[in] map Transformation to compose with. */ - virtual void compose(const Map& map) = 0; + CV_WRAP virtual void compose(cv::Ptr map) = 0; /*! * Scales the map by a given factor as if the coordinates system is expanded/compressed * by that factor. * \param[in] factor Expansion if bigger than one, compression if smaller than one */ - virtual void scale(double factor) = 0; + CV_WRAP virtual void scale(double factor) = 0; }; //! @} diff --git a/IPL/include/opencv/opencv2/reg/mapaffine.hpp b/IPL/include/opencv/opencv2/reg/mapaffine.hpp index 1c91326..52ab8d6 100644 --- a/IPL/include/opencv/opencv2/reg/mapaffine.hpp +++ b/IPL/include/opencv/opencv2/reg/mapaffine.hpp @@ -49,33 +49,33 @@ namespace reg { /*! * Defines an affine transformation */ -class CV_EXPORTS MapAffine : public Map +class CV_EXPORTS_W MapAffine : public Map { public: /*! * Default constructor builds an identity map */ - MapAffine(void); + CV_WRAP MapAffine(); /*! * Constructor providing explicit values * \param[in] linTr Linear part of the affine transformation * \param[in] shift Displacement part of the affine transformation */ - MapAffine(const cv::Matx& linTr, const cv::Vec& shift); + CV_WRAP MapAffine(InputArray linTr, InputArray shift); /*! * Destructor */ - ~MapAffine(void); + ~MapAffine(); - void inverseWarp(const cv::Mat& img1, cv::Mat& img2) const; + CV_WRAP void inverseWarp(InputArray img1, OutputArray img2) const CV_OVERRIDE; - cv::Ptr inverseMap(void) const; + CV_WRAP cv::Ptr inverseMap() const CV_OVERRIDE; - void compose(const Map& map); + CV_WRAP void compose(cv::Ptr map) CV_OVERRIDE; - void scale(double factor); + CV_WRAP void scale(double factor) CV_OVERRIDE; /*! * Return linear part of the affine transformation @@ -85,6 +85,10 @@ class CV_EXPORTS MapAffine : public Map return linTr_; } + CV_WRAP void getLinTr(OutputArray linTr) const { + Mat(linTr_).copyTo(linTr); + } + /*! * Return displacement part of the affine transformation * \return Displacement part of the affine transformation @@ -93,6 +97,10 @@ class CV_EXPORTS MapAffine : public Map return shift_; } + CV_WRAP void getShift(OutputArray shift) const { + Mat(shift_).copyTo(shift); + } + private: cv::Matx linTr_; cv::Vec shift_; diff --git a/IPL/include/opencv/opencv2/reg/mapper.hpp b/IPL/include/opencv/opencv2/reg/mapper.hpp index 8abadd1..2c2862d 100644 --- a/IPL/include/opencv/opencv2/reg/mapper.hpp +++ b/IPL/include/opencv/opencv2/reg/mapper.hpp @@ -47,11 +47,11 @@ namespace reg { //! @addtogroup reg //! @{ -/** @brief Base class for modelling an algorithm for calculating a +/** @brief Base class for modelling an algorithm for calculating a map The class is only used to define the common interface for any possible mapping algorithm. */ -class CV_EXPORTS Mapper +class CV_EXPORTS_W Mapper { public: virtual ~Mapper(void) {} @@ -60,16 +60,16 @@ class CV_EXPORTS Mapper * Calculate mapping between two images * \param[in] img1 Reference image * \param[in] img2 Warped image - * \param[in,out] res Map from img1 to img2, stored in a smart pointer. If present as input, - * it is an initial rough estimation that the mapper will try to refine. + * \param[in] If present, it is an initial rough estimation that the mapper will try to refine. + * \return Map from img1 to img2, stored in a smart pointer. */ - virtual void calculate(const cv::Mat& img1, const cv::Mat& img2, cv::Ptr& res) const = 0; + CV_WRAP virtual cv::Ptr calculate(InputArray img1, InputArray img2, cv::Ptr init = cv::Ptr()) const = 0; /* * Returns a map compatible with the Mapper class * \return Pointer to identity Map */ - virtual cv::Ptr getMap(void) const = 0; + CV_WRAP virtual cv::Ptr getMap() const = 0; protected: /* diff --git a/IPL/include/opencv/opencv2/reg/mappergradaffine.hpp b/IPL/include/opencv/opencv2/reg/mappergradaffine.hpp index 08d5397..49b0bc7 100644 --- a/IPL/include/opencv/opencv2/reg/mappergradaffine.hpp +++ b/IPL/include/opencv/opencv2/reg/mappergradaffine.hpp @@ -49,15 +49,15 @@ namespace reg { /*! * Mapper for affine motion */ -class CV_EXPORTS MapperGradAffine: public Mapper +class CV_EXPORTS_W MapperGradAffine: public Mapper { public: - MapperGradAffine(void); + CV_WRAP MapperGradAffine(); ~MapperGradAffine(void); - virtual void calculate(const cv::Mat& img1, const cv::Mat& img2, cv::Ptr& res) const; + CV_WRAP virtual cv::Ptr calculate(InputArray img1, InputArray img2, cv::Ptr init = cv::Ptr()) const CV_OVERRIDE; - cv::Ptr getMap(void) const; + CV_WRAP cv::Ptr getMap() const CV_OVERRIDE; }; //! @} diff --git a/IPL/include/opencv/opencv2/reg/mappergradeuclid.hpp b/IPL/include/opencv/opencv2/reg/mappergradeuclid.hpp index 29c49cb..4f6c5c8 100644 --- a/IPL/include/opencv/opencv2/reg/mappergradeuclid.hpp +++ b/IPL/include/opencv/opencv2/reg/mappergradeuclid.hpp @@ -49,15 +49,15 @@ namespace reg { /*! * Mapper for euclidean motion: rotation plus shift */ -class CV_EXPORTS MapperGradEuclid: public Mapper +class CV_EXPORTS_W MapperGradEuclid: public Mapper { public: - MapperGradEuclid(void); - ~MapperGradEuclid(void); + CV_WRAP MapperGradEuclid(); + ~MapperGradEuclid(); - virtual void calculate(const cv::Mat& img1, const cv::Mat& img2, cv::Ptr& res) const; + CV_WRAP virtual cv::Ptr calculate(InputArray img1, InputArray img2, cv::Ptr init = cv::Ptr()) const CV_OVERRIDE; - cv::Ptr getMap(void) const; + CV_WRAP cv::Ptr getMap() const CV_OVERRIDE; }; //! @} diff --git a/IPL/include/opencv/opencv2/reg/mappergradproj.hpp b/IPL/include/opencv/opencv2/reg/mappergradproj.hpp index f1721e8..1e01c71 100644 --- a/IPL/include/opencv/opencv2/reg/mappergradproj.hpp +++ b/IPL/include/opencv/opencv2/reg/mappergradproj.hpp @@ -49,15 +49,15 @@ namespace reg { /*! * Gradient mapper for a projective transformation */ -class CV_EXPORTS MapperGradProj: public Mapper +class CV_EXPORTS_W MapperGradProj: public Mapper { public: - MapperGradProj(void); - ~MapperGradProj(void); + CV_WRAP MapperGradProj(); + ~MapperGradProj(); - virtual void calculate(const cv::Mat& img1, const cv::Mat& img2, cv::Ptr& res) const; + CV_WRAP virtual cv::Ptr calculate(InputArray img1, InputArray img2, cv::Ptr init = cv::Ptr()) const CV_OVERRIDE; - cv::Ptr getMap(void) const; + CV_WRAP cv::Ptr getMap() const CV_OVERRIDE; }; //! @} diff --git a/IPL/include/opencv/opencv2/reg/mappergradshift.hpp b/IPL/include/opencv/opencv2/reg/mappergradshift.hpp index a9f75b3..cd812d9 100644 --- a/IPL/include/opencv/opencv2/reg/mappergradshift.hpp +++ b/IPL/include/opencv/opencv2/reg/mappergradshift.hpp @@ -49,15 +49,15 @@ namespace reg { /*! * Gradient mapper for a translation */ -class CV_EXPORTS MapperGradShift: public Mapper +class CV_EXPORTS_W MapperGradShift: public Mapper { public: - MapperGradShift(void); - virtual ~MapperGradShift(void); + CV_WRAP MapperGradShift(); + virtual ~MapperGradShift(); - virtual void calculate(const cv::Mat& img1, const cv::Mat& img2, cv::Ptr& res) const; + CV_WRAP virtual cv::Ptr calculate(InputArray img1, InputArray img2, cv::Ptr init = cv::Ptr()) const CV_OVERRIDE; - cv::Ptr getMap(void) const; + CV_WRAP cv::Ptr getMap() const CV_OVERRIDE; }; //! @} diff --git a/IPL/include/opencv/opencv2/reg/mappergradsimilar.hpp b/IPL/include/opencv/opencv2/reg/mappergradsimilar.hpp index ea45ab9..07d64d9 100644 --- a/IPL/include/opencv/opencv2/reg/mappergradsimilar.hpp +++ b/IPL/include/opencv/opencv2/reg/mappergradsimilar.hpp @@ -49,15 +49,15 @@ namespace reg { /*! * Calculates a similarity transformation between to images (scale, rotation, and shift) */ -class CV_EXPORTS MapperGradSimilar: public Mapper +class CV_EXPORTS_W MapperGradSimilar: public Mapper { public: - MapperGradSimilar(void); - ~MapperGradSimilar(void); + CV_WRAP MapperGradSimilar(); + ~MapperGradSimilar(); - virtual void calculate(const cv::Mat& img1, const cv::Mat& img2, cv::Ptr& res) const; + CV_WRAP virtual cv::Ptr calculate(InputArray img1, InputArray img2, cv::Ptr init = cv::Ptr()) const CV_OVERRIDE; - cv::Ptr getMap(void) const; + CV_WRAP cv::Ptr getMap() const CV_OVERRIDE; }; //! @} diff --git a/IPL/include/opencv/opencv2/reg/mapperpyramid.hpp b/IPL/include/opencv/opencv2/reg/mapperpyramid.hpp index 33440bd..6106f49 100644 --- a/IPL/include/opencv/opencv2/reg/mapperpyramid.hpp +++ b/IPL/include/opencv/opencv2/reg/mapperpyramid.hpp @@ -39,7 +39,9 @@ #define MAPPERPYRAMID_H_ #include "mapper.hpp" - +#include "mapaffine.hpp" +#include "mapprojec.hpp" +#include "mapshift.hpp" namespace cv { namespace reg { @@ -50,27 +52,52 @@ namespace reg { /*! * Calculates a map using a gaussian pyramid */ -class CV_EXPORTS MapperPyramid: public Mapper +class CV_EXPORTS_W MapperPyramid: public Mapper { public: /* * Constructor * \param[in] baseMapper Base mapper used for the refinements */ - MapperPyramid(const Mapper& baseMapper); + CV_WRAP MapperPyramid(Ptr baseMapper); - void calculate(const cv::Mat& img1, const cv::Mat& img2, cv::Ptr& res) const; + CV_WRAP virtual cv::Ptr calculate(InputArray img1, InputArray img2, cv::Ptr init = cv::Ptr()) const CV_OVERRIDE; - cv::Ptr getMap(void) const; + CV_WRAP cv::Ptr getMap() const CV_OVERRIDE; - unsigned numLev_; /*!< Number of levels of the pyramid */ - unsigned numIterPerScale_; /*!< Number of iterations at a given scale of the pyramid */ + CV_PROP_RW int numLev_; /*!< Number of levels of the pyramid */ + CV_PROP_RW int numIterPerScale_; /*!< Number of iterations at a given scale of the pyramid */ private: MapperPyramid& operator=(const MapperPyramid&); const Mapper& baseMapper_; /*!< Mapper used in inner level */ }; +/*! + * Converts a pointer to a Map returned by MapperPyramid::calculate into the specified Map pointer type + */ +class CV_EXPORTS_W MapTypeCaster +{ +public: + CV_WRAP static Ptr toAffine(Ptr sourceMap) + { + MapAffine& affineMap = dynamic_cast(*sourceMap); + return Ptr(new MapAffine(affineMap)); + } + + CV_WRAP static Ptr toShift(Ptr sourceMap) + { + MapShift& shiftMap = dynamic_cast(*sourceMap); + return Ptr(new MapShift(shiftMap)); + } + + CV_WRAP static Ptr toProjec(Ptr sourceMap) + { + MapProjec& projecMap = dynamic_cast(*sourceMap); + return Ptr(new MapProjec(projecMap)); + } +}; + //! @} }} // namespace cv::reg diff --git a/IPL/include/opencv/opencv2/reg/mapprojec.hpp b/IPL/include/opencv/opencv2/reg/mapprojec.hpp index 57ef146..6d1e565 100644 --- a/IPL/include/opencv/opencv2/reg/mapprojec.hpp +++ b/IPL/include/opencv/opencv2/reg/mapprojec.hpp @@ -50,32 +50,32 @@ namespace reg { /*! * Defines an transformation that consists on a projective transformation */ -class CV_EXPORTS MapProjec : public Map +class CV_EXPORTS_W MapProjec : public Map { public: /*! * Default constructor builds an identity map */ - MapProjec(void); + CV_WRAP MapProjec(); /*! * Constructor providing explicit values * \param[in] projTr Projective transformation */ - MapProjec(const cv::Matx& projTr); + CV_WRAP MapProjec(InputArray projTr); /*! * Destructor */ - ~MapProjec(void); + ~MapProjec(); - void inverseWarp(const cv::Mat& img1, cv::Mat& img2) const; + CV_WRAP void inverseWarp(InputArray img1, OutputArray img2) const CV_OVERRIDE; - cv::Ptr inverseMap(void) const; + CV_WRAP cv::Ptr inverseMap() const CV_OVERRIDE; - void compose(const Map& map); + CV_WRAP void compose(cv::Ptr map) CV_OVERRIDE; - void scale(double factor); + CV_WRAP void scale(double factor) CV_OVERRIDE; /*! * Returns projection matrix @@ -85,10 +85,14 @@ class CV_EXPORTS MapProjec : public Map return projTr_; } + CV_WRAP void getProjTr(OutputArray projTr) const { + Mat(projTr_).copyTo(projTr); + } + /*! * Normalizes object's homography */ - void normalize(void) { + CV_WRAP void normalize() { double z = 1./projTr_(2, 2); for(size_t v_i = 0; v_i < sizeof(projTr_.val)/sizeof(projTr_.val[0]); ++v_i) projTr_.val[v_i] *= z; diff --git a/IPL/include/opencv/opencv2/reg/mapshift.hpp b/IPL/include/opencv/opencv2/reg/mapshift.hpp index e5f54a4..ada9545 100644 --- a/IPL/include/opencv/opencv2/reg/mapshift.hpp +++ b/IPL/include/opencv/opencv2/reg/mapshift.hpp @@ -50,32 +50,33 @@ namespace reg { /*! * Defines an transformation that consists on a simple displacement */ -class CV_EXPORTS MapShift : public Map +class CV_EXPORTS_W MapShift : public Map { public: /*! * Default constructor builds an identity map */ - MapShift(void); + CV_WRAP MapShift(); /*! * Constructor providing explicit values * \param[in] shift Displacement */ - MapShift(const cv::Vec& shift); + + CV_WRAP MapShift(InputArray shift); /*! * Destructor */ - ~MapShift(void); + ~MapShift(); - void inverseWarp(const cv::Mat& img1, cv::Mat& img2) const; + CV_WRAP void inverseWarp(InputArray img1, OutputArray img2) const CV_OVERRIDE; - cv::Ptr inverseMap(void) const; + CV_WRAP cv::Ptr inverseMap() const CV_OVERRIDE; - void compose(const Map& map); + CV_WRAP void compose(cv::Ptr map) CV_OVERRIDE; - void scale(double factor); + CV_WRAP void scale(double factor) CV_OVERRIDE; /*! * Return displacement @@ -85,6 +86,10 @@ class CV_EXPORTS MapShift : public Map return shift_; } + CV_WRAP void getShift(OutputArray shift) const { + Mat(shift_).copyTo(shift); + } + private: cv::Vec shift_; /*< Displacement */ }; diff --git a/IPL/include/opencv/opencv2/rgbd.hpp b/IPL/include/opencv/opencv2/rgbd.hpp index b25bd3d..37b2927 100644 --- a/IPL/include/opencv/opencv2/rgbd.hpp +++ b/IPL/include/opencv/opencv2/rgbd.hpp @@ -1,1049 +1,23 @@ -/* - * Software License Agreement (BSD License) - * - * Copyright (c) 2009, Willow Garage, Inc. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials provided - * with the distribution. - * * Neither the name of Willow Garage, Inc. nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER - * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN - * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - * - */ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html + +// This code is also subject to the license terms in the LICENSE_KinectFusion.md file found in this module's directory + +// This code is also subject to the license terms in the LICENSE_WillowGarage.md file found in this module's directory #ifndef __OPENCV_RGBD_HPP__ #define __OPENCV_RGBD_HPP__ -#ifdef __cplusplus +#include "opencv2/rgbd/linemod.hpp" +#include "opencv2/rgbd/depth.hpp" +#include "opencv2/rgbd/kinfu.hpp" +#include "opencv2/rgbd/dynafu.hpp" -#include -#include /** @defgroup rgbd RGB-Depth Processing */ -namespace cv -{ -namespace rgbd -{ - -//! @addtogroup rgbd -//! @{ - - /** Checks if the value is a valid depth. For CV_16U or CV_16S, the convention is to be invalid if it is - * a limit. For a float/double, we just check if it is a NaN - * @param depth the depth to check for validity - */ - CV_EXPORTS - inline bool - isValidDepth(const float & depth) - { - return !cvIsNaN(depth); - } - CV_EXPORTS - inline bool - isValidDepth(const double & depth) - { - return !cvIsNaN(depth); - } - CV_EXPORTS - inline bool - isValidDepth(const short int & depth) - { - return (depth != std::numeric_limits::min()) && (depth != std::numeric_limits::max()); - } - CV_EXPORTS - inline bool - isValidDepth(const unsigned short int & depth) - { - return (depth != std::numeric_limits::min()) - && (depth != std::numeric_limits::max()); - } - CV_EXPORTS - inline bool - isValidDepth(const int & depth) - { - return (depth != std::numeric_limits::min()) && (depth != std::numeric_limits::max()); - } - CV_EXPORTS - inline bool - isValidDepth(const unsigned int & depth) - { - return (depth != std::numeric_limits::min()) && (depth != std::numeric_limits::max()); - } - - /** Object that can compute the normals in an image. - * It is an object as it can cache data for speed efficiency - * The implemented methods are either: - * - FALS (the fastest) and SRI from - * ``Fast and Accurate Computation of Surface Normals from Range Images`` - * by H. Badino, D. Huber, Y. Park and T. Kanade - * - the normals with bilateral filtering on a depth image from - * ``Gradient Response Maps for Real-Time Detection of Texture-Less Objects`` - * by S. Hinterstoisser, C. Cagniart, S. Ilic, P. Sturm, N. Navab, P. Fua, and V. Lepetit - */ - class CV_EXPORTS RgbdNormals: public Algorithm - { - public: - enum RGBD_NORMALS_METHOD - { - RGBD_NORMALS_METHOD_FALS, RGBD_NORMALS_METHOD_LINEMOD, RGBD_NORMALS_METHOD_SRI - }; - - RgbdNormals() - : - rows_(0), - cols_(0), - depth_(0), - K_(Mat()), - window_size_(0), - method_(RGBD_NORMALS_METHOD_FALS), - rgbd_normals_impl_(0) - { - } - - /** Constructor - * @param rows the number of rows of the depth image normals will be computed on - * @param cols the number of cols of the depth image normals will be computed on - * @param depth the depth of the normals (only CV_32F or CV_64F) - * @param K the calibration matrix to use - * @param window_size the window size to compute the normals: can only be 1,3,5 or 7 - * @param method one of the methods to use: RGBD_NORMALS_METHOD_SRI, RGBD_NORMALS_METHOD_FALS - */ - RgbdNormals(int rows, int cols, int depth, InputArray K, int window_size = 5, int method = - RGBD_NORMALS_METHOD_FALS); - - ~RgbdNormals(); - - /** Given a set of 3d points in a depth image, compute the normals at each point. - * @param points a rows x cols x 3 matrix of CV_32F/CV64F or a rows x cols x 1 CV_U16S - * @param normals a rows x cols x 3 matrix - */ - void - operator()(InputArray points, OutputArray normals) const; - - /** Initializes some data that is cached for later computation - * If that function is not called, it will be called the first time normals are computed - */ - void - initialize() const; - - int getRows() const - { - return rows_; - } - void setRows(int val) - { - rows_ = val; - } - int getCols() const - { - return cols_; - } - void setCols(int val) - { - cols_ = val; - } - int getWindowSize() const - { - return window_size_; - } - void setWindowSize(int val) - { - window_size_ = val; - } - int getDepth() const - { - return depth_; - } - void setDepth(int val) - { - depth_ = val; - } - cv::Mat getK() const - { - return K_; - } - void setK(const cv::Mat &val) - { - K_ = val; - } - int getMethod() const - { - return method_; - } - void setMethod(int val) - { - method_ = val; - } - - protected: - void - initialize_normals_impl(int rows, int cols, int depth, const Mat & K, int window_size, int method) const; - - int rows_, cols_, depth_; - Mat K_; - int window_size_; - int method_; - mutable void* rgbd_normals_impl_; - }; - - /** Object that can clean a noisy depth image - */ - class CV_EXPORTS DepthCleaner: public Algorithm - { - public: - /** NIL method is from - * ``Modeling Kinect Sensor Noise for Improved 3d Reconstruction and Tracking`` - * by C. Nguyen, S. Izadi, D. Lovel - */ - enum DEPTH_CLEANER_METHOD - { - DEPTH_CLEANER_NIL - }; - - DepthCleaner() - : - depth_(0), - window_size_(0), - method_(DEPTH_CLEANER_NIL), - depth_cleaner_impl_(0) - { - } - - /** Constructor - * @param depth the depth of the normals (only CV_32F or CV_64F) - * @param window_size the window size to compute the normals: can only be 1,3,5 or 7 - * @param method one of the methods to use: RGBD_NORMALS_METHOD_SRI, RGBD_NORMALS_METHOD_FALS - */ - DepthCleaner(int depth, int window_size = 5, int method = DEPTH_CLEANER_NIL); - - ~DepthCleaner(); - - /** Given a set of 3d points in a depth image, compute the normals at each point. - * @param points a rows x cols x 3 matrix of CV_32F/CV64F or a rows x cols x 1 CV_U16S - * @param depth a rows x cols matrix of the cleaned up depth - */ - void - operator()(InputArray points, OutputArray depth) const; - - /** Initializes some data that is cached for later computation - * If that function is not called, it will be called the first time normals are computed - */ - void - initialize() const; - - int getWindowSize() const - { - return window_size_; - } - void setWindowSize(int val) - { - window_size_ = val; - } - int getDepth() const - { - return depth_; - } - void setDepth(int val) - { - depth_ = val; - } - int getMethod() const - { - return method_; - } - void setMethod(int val) - { - method_ = val; - } - - protected: - void - initialize_cleaner_impl() const; - - int depth_; - int window_size_; - int method_; - mutable void* depth_cleaner_impl_; - }; - - - /** Registers depth data to an external camera - * Registration is performed by creating a depth cloud, transforming the cloud by - * the rigid body transformation between the cameras, and then projecting the - * transformed points into the RGB camera. - * - * uv_rgb = K_rgb * [R | t] * z * inv(K_ir) * uv_ir - * - * Currently does not check for negative depth values. - * - * @param unregisteredCameraMatrix the camera matrix of the depth camera - * @param registeredCameraMatrix the camera matrix of the external camera - * @param registeredDistCoeffs the distortion coefficients of the external camera - * @param Rt the rigid body transform between the cameras. Transforms points from depth camera frame to external camera frame. - * @param unregisteredDepth the input depth data - * @param outputImagePlaneSize the image plane dimensions of the external camera (width, height) - * @param registeredDepth the result of transforming the depth into the external camera - * @param depthDilation whether or not the depth is dilated to avoid holes and occlusion errors (optional) - */ - CV_EXPORTS - void - registerDepth(InputArray unregisteredCameraMatrix, InputArray registeredCameraMatrix, InputArray registeredDistCoeffs, - InputArray Rt, InputArray unregisteredDepth, const Size& outputImagePlaneSize, - OutputArray registeredDepth, bool depthDilation=false); - - /** - * @param depth the depth image - * @param in_K - * @param in_points the list of xy coordinates - * @param points3d the resulting 3d points - */ - CV_EXPORTS - void - depthTo3dSparse(InputArray depth, InputArray in_K, InputArray in_points, OutputArray points3d); - - /** Converts a depth image to an organized set of 3d points. - * The coordinate system is x pointing left, y down and z away from the camera - * @param depth the depth image (if given as short int CV_U, it is assumed to be the depth in millimeters - * (as done with the Microsoft Kinect), otherwise, if given as CV_32F or CV_64F, it is assumed in meters) - * @param K The calibration matrix - * @param points3d the resulting 3d points. They are of depth the same as `depth` if it is CV_32F or CV_64F, and the - * depth of `K` if `depth` is of depth CV_U - * @param mask the mask of the points to consider (can be empty) - */ - CV_EXPORTS - void - depthTo3d(InputArray depth, InputArray K, OutputArray points3d, InputArray mask = noArray()); - - /** If the input image is of type CV_16UC1 (like the Kinect one), the image is converted to floats, divided - * by 1000 to get a depth in meters, and the values 0 are converted to std::numeric_limits::quiet_NaN() - * Otherwise, the image is simply converted to floats - * @param in the depth image (if given as short int CV_U, it is assumed to be the depth in millimeters - * (as done with the Microsoft Kinect), it is assumed in meters) - * @param depth the desired output depth (floats or double) - * @param out The rescaled float depth image - */ - CV_EXPORTS - void - rescaleDepth(InputArray in, int depth, OutputArray out); - - /** Object that can compute planes in an image - */ - class CV_EXPORTS RgbdPlane: public Algorithm - { - public: - enum RGBD_PLANE_METHOD - { - RGBD_PLANE_METHOD_DEFAULT - }; - - RgbdPlane(RGBD_PLANE_METHOD method = RGBD_PLANE_METHOD_DEFAULT) - : - method_(method), - block_size_(40), - min_size_(block_size_*block_size_), - threshold_(0.01), - sensor_error_a_(0), - sensor_error_b_(0), - sensor_error_c_(0) - { - } - - /** Find The planes in a depth image - * @param points3d the 3d points organized like the depth image: rows x cols with 3 channels - * @param normals the normals for every point in the depth image - * @param mask An image where each pixel is labeled with the plane it belongs to - * and 255 if it does not belong to any plane - * @param plane_coefficients the coefficients of the corresponding planes (a,b,c,d) such that ax+by+cz+d=0, norm(a,b,c)=1 - * and c < 0 (so that the normal points towards the camera) - */ - void - operator()(InputArray points3d, InputArray normals, OutputArray mask, - OutputArray plane_coefficients); - - /** Find The planes in a depth image but without doing a normal check, which is faster but less accurate - * @param points3d the 3d points organized like the depth image: rows x cols with 3 channels - * @param mask An image where each pixel is labeled with the plane it belongs to - * and 255 if it does not belong to any plane - * @param plane_coefficients the coefficients of the corresponding planes (a,b,c,d) such that ax+by+cz+d=0 - */ - void - operator()(InputArray points3d, OutputArray mask, OutputArray plane_coefficients); - - int getBlockSize() const - { - return block_size_; - } - void setBlockSize(int val) - { - block_size_ = val; - } - int getMinSize() const - { - return min_size_; - } - void setMinSize(int val) - { - min_size_ = val; - } - int getMethod() const - { - return method_; - } - void setMethod(int val) - { - method_ = val; - } - double getThreshold() const - { - return threshold_; - } - void setThreshold(double val) - { - threshold_ = val; - } - double getSensorErrorA() const - { - return sensor_error_a_; - } - void setSensorErrorA(double val) - { - sensor_error_a_ = val; - } - double getSensorErrorB() const - { - return sensor_error_b_; - } - void setSensorErrorB(double val) - { - sensor_error_b_ = val; - } - double getSensorErrorC() const - { - return sensor_error_c_; - } - void setSensorErrorC(double val) - { - sensor_error_c_ = val; - } - - private: - /** The method to use to compute the planes */ - int method_; - /** The size of the blocks to look at for a stable MSE */ - int block_size_; - /** The minimum size of a cluster to be considered a plane */ - int min_size_; - /** How far a point can be from a plane to belong to it (in meters) */ - double threshold_; - /** coefficient of the sensor error with respect to the. All 0 by default but you want a=0.0075 for a Kinect */ - double sensor_error_a_, sensor_error_b_, sensor_error_c_; - }; - - /** Object that contains a frame data. - */ - struct CV_EXPORTS RgbdFrame - { - RgbdFrame(); - RgbdFrame(const Mat& image, const Mat& depth, const Mat& mask=Mat(), const Mat& normals=Mat(), int ID=-1); - virtual ~RgbdFrame(); - - virtual void - release(); - - int ID; - Mat image; - Mat depth; - Mat mask; - Mat normals; - }; - - /** Object that contains a frame data that is possibly needed for the Odometry. - * It's used for the efficiency (to pass precomputed/cached data of the frame that participates - * in the Odometry processing several times). - */ - struct CV_EXPORTS OdometryFrame : public RgbdFrame - { - /** These constants are used to set a type of cache which has to be prepared depending on the frame role: - * srcFrame or dstFrame (see compute method of the Odometry class). For the srcFrame and dstFrame different cache data may be required, - * some part of a cache may be common for both frame roles. - * @param CACHE_SRC The cache data for the srcFrame will be prepared. - * @param CACHE_DST The cache data for the dstFrame will be prepared. - * @param CACHE_ALL The cache data for both srcFrame and dstFrame roles will be computed. - */ - enum - { - CACHE_SRC = 1, CACHE_DST = 2, CACHE_ALL = CACHE_SRC + CACHE_DST - }; - - OdometryFrame(); - OdometryFrame(const Mat& image, const Mat& depth, const Mat& mask=Mat(), const Mat& normals=Mat(), int ID=-1); - - virtual void - release(); - - void - releasePyramids(); - - std::vector pyramidImage; - std::vector pyramidDepth; - std::vector pyramidMask; - - std::vector pyramidCloud; - - std::vector pyramid_dI_dx; - std::vector pyramid_dI_dy; - std::vector pyramidTexturedMask; - - std::vector pyramidNormals; - std::vector pyramidNormalsMask; - }; - - /** Base class for computation of odometry. - */ - class CV_EXPORTS Odometry: public Algorithm - { - public: - - /** A class of transformation*/ - enum - { - ROTATION = 1, TRANSLATION = 2, RIGID_BODY_MOTION = 4 - }; - - static inline float - DEFAULT_MIN_DEPTH() - { - return 0.f; // in meters - } - static inline float - DEFAULT_MAX_DEPTH() - { - return 4.f; // in meters - } - static inline float - DEFAULT_MAX_DEPTH_DIFF() - { - return 0.07f; // in meters - } - static inline float - DEFAULT_MAX_POINTS_PART() - { - return 0.07f; // in [0, 1] - } - static inline float - DEFAULT_MAX_TRANSLATION() - { - return 0.15f; // in meters - } - static inline float - DEFAULT_MAX_ROTATION() - { - return 15; // in degrees - } - - /** Method to compute a transformation from the source frame to the destination one. - * Some odometry algorithms do not used some data of frames (eg. ICP does not use images). - * In such case corresponding arguments can be set as empty Mat. - * The method returns true if all internal computions were possible (e.g. there were enough correspondences, - * system of equations has a solution, etc) and resulting transformation satisfies some test if it's provided - * by the Odometry inheritor implementation (e.g. thresholds for maximum translation and rotation). - * @param srcImage Image data of the source frame (CV_8UC1) - * @param srcDepth Depth data of the source frame (CV_32FC1, in meters) - * @param srcMask Mask that sets which pixels have to be used from the source frame (CV_8UC1) - * @param dstImage Image data of the destination frame (CV_8UC1) - * @param dstDepth Depth data of the destination frame (CV_32FC1, in meters) - * @param dstMask Mask that sets which pixels have to be used from the destination frame (CV_8UC1) - * @param Rt Resulting transformation from the source frame to the destination one (rigid body motion): - dst_p = Rt * src_p, where dst_p is a homogeneous point in the destination frame and src_p is - homogeneous point in the source frame, - Rt is 4x4 matrix of CV_64FC1 type. - * @param initRt Initial transformation from the source frame to the destination one (optional) - */ - bool - compute(const Mat& srcImage, const Mat& srcDepth, const Mat& srcMask, const Mat& dstImage, const Mat& dstDepth, - const Mat& dstMask, Mat& Rt, const Mat& initRt = Mat()) const; - - /** One more method to compute a transformation from the source frame to the destination one. - * It is designed to save on computing the frame data (image pyramids, normals, etc.). - */ - bool - compute(Ptr& srcFrame, Ptr& dstFrame, Mat& Rt, const Mat& initRt = Mat()) const; - - /** Prepare a cache for the frame. The function checks the precomputed/passed data (throws the error if this data - * does not satisfy) and computes all remaining cache data needed for the frame. Returned size is a resolution - * of the prepared frame. - * @param frame The odometry which will process the frame. - * @param cacheType The cache type: CACHE_SRC, CACHE_DST or CACHE_ALL. - */ - virtual Size prepareFrameCache(Ptr& frame, int cacheType) const; - - static Ptr create(const String & odometryType); - - /** @see setCameraMatrix */ - virtual cv::Mat getCameraMatrix() const = 0; - /** @copybrief getCameraMatrix @see getCameraMatrix */ - virtual void setCameraMatrix(const cv::Mat &val) = 0; - /** @see setTransformType */ - virtual int getTransformType() const = 0; - /** @copybrief getTransformType @see getTransformType */ - virtual void setTransformType(int val) = 0; - - protected: - virtual void - checkParams() const = 0; - - virtual bool - computeImpl(const Ptr& srcFrame, const Ptr& dstFrame, Mat& Rt, - const Mat& initRt) const = 0; - }; - - /** Odometry based on the paper "Real-Time Visual Odometry from Dense RGB-D Images", - * F. Steinbucker, J. Strum, D. Cremers, ICCV, 2011. - */ - class CV_EXPORTS RgbdOdometry: public Odometry - { - public: - RgbdOdometry(); - /** Constructor. - * @param cameraMatrix Camera matrix - * @param minDepth Pixels with depth less than minDepth will not be used (in meters) - * @param maxDepth Pixels with depth larger than maxDepth will not be used (in meters) - * @param maxDepthDiff Correspondences between pixels of two given frames will be filtered out - * if their depth difference is larger than maxDepthDiff (in meters) - * @param iterCounts Count of iterations on each pyramid level. - * @param minGradientMagnitudes For each pyramid level the pixels will be filtered out - * if they have gradient magnitude less than minGradientMagnitudes[level]. - * @param maxPointsPart The method uses a random pixels subset of size frameWidth x frameHeight x pointsPart - * @param transformType Class of transformation - */ - RgbdOdometry(const Mat& cameraMatrix, float minDepth = DEFAULT_MIN_DEPTH(), float maxDepth = DEFAULT_MAX_DEPTH(), - float maxDepthDiff = DEFAULT_MAX_DEPTH_DIFF(), const std::vector& iterCounts = std::vector(), - const std::vector& minGradientMagnitudes = std::vector(), float maxPointsPart = DEFAULT_MAX_POINTS_PART(), - int transformType = RIGID_BODY_MOTION); - - virtual Size prepareFrameCache(Ptr& frame, int cacheType) const; - - cv::Mat getCameraMatrix() const - { - return cameraMatrix; - } - void setCameraMatrix(const cv::Mat &val) - { - cameraMatrix = val; - } - double getMinDepth() const - { - return minDepth; - } - void setMinDepth(double val) - { - minDepth = val; - } - double getMaxDepth() const - { - return maxDepth; - } - void setMaxDepth(double val) - { - maxDepth = val; - } - double getMaxDepthDiff() const - { - return maxDepthDiff; - } - void setMaxDepthDiff(double val) - { - maxDepthDiff = val; - } - cv::Mat getIterationCounts() const - { - return iterCounts; - } - void setIterationCounts(const cv::Mat &val) - { - iterCounts = val; - } - cv::Mat getMinGradientMagnitudes() const - { - return minGradientMagnitudes; - } - void setMinGradientMagnitudes(const cv::Mat &val) - { - minGradientMagnitudes = val; - } - double getMaxPointsPart() const - { - return maxPointsPart; - } - void setMaxPointsPart(double val) - { - maxPointsPart = val; - } - int getTransformType() const - { - return transformType; - } - void setTransformType(int val) - { - transformType = val; - } - double getMaxTranslation() const - { - return maxTranslation; - } - void setMaxTranslation(double val) - { - maxTranslation = val; - } - double getMaxRotation() const - { - return maxRotation; - } - void setMaxRotation(double val) - { - maxRotation = val; - } - - protected: - virtual void - checkParams() const; - - virtual bool - computeImpl(const Ptr& srcFrame, const Ptr& dstFrame, Mat& Rt, - const Mat& initRt) const; - - // Some params have commented desired type. It's due to AlgorithmInfo::addParams does not support it now. - /*float*/ - double minDepth, maxDepth, maxDepthDiff; - /*vector*/ - Mat iterCounts; - /*vector*/ - Mat minGradientMagnitudes; - double maxPointsPart; - - Mat cameraMatrix; - int transformType; - - double maxTranslation, maxRotation; - }; - - /** Odometry based on the paper "KinectFusion: Real-Time Dense Surface Mapping and Tracking", - * Richard A. Newcombe, Andrew Fitzgibbon, at al, SIGGRAPH, 2011. - */ - class ICPOdometry: public Odometry - { - public: - ICPOdometry(); - /** Constructor. - * @param cameraMatrix Camera matrix - * @param minDepth Pixels with depth less than minDepth will not be used - * @param maxDepth Pixels with depth larger than maxDepth will not be used - * @param maxDepthDiff Correspondences between pixels of two given frames will be filtered out - * if their depth difference is larger than maxDepthDiff - * @param maxPointsPart The method uses a random pixels subset of size frameWidth x frameHeight x pointsPart - * @param iterCounts Count of iterations on each pyramid level. - * @param transformType Class of trasformation - */ - ICPOdometry(const Mat& cameraMatrix, float minDepth = DEFAULT_MIN_DEPTH(), float maxDepth = DEFAULT_MAX_DEPTH(), - float maxDepthDiff = DEFAULT_MAX_DEPTH_DIFF(), float maxPointsPart = DEFAULT_MAX_POINTS_PART(), - const std::vector& iterCounts = std::vector(), int transformType = RIGID_BODY_MOTION); - - virtual Size prepareFrameCache(Ptr& frame, int cacheType) const; - - cv::Mat getCameraMatrix() const - { - return cameraMatrix; - } - void setCameraMatrix(const cv::Mat &val) - { - cameraMatrix = val; - } - double getMinDepth() const - { - return minDepth; - } - void setMinDepth(double val) - { - minDepth = val; - } - double getMaxDepth() const - { - return maxDepth; - } - void setMaxDepth(double val) - { - maxDepth = val; - } - double getMaxDepthDiff() const - { - return maxDepthDiff; - } - void setMaxDepthDiff(double val) - { - maxDepthDiff = val; - } - cv::Mat getIterationCounts() const - { - return iterCounts; - } - void setIterationCounts(const cv::Mat &val) - { - iterCounts = val; - } - double getMaxPointsPart() const - { - return maxPointsPart; - } - void setMaxPointsPart(double val) - { - maxPointsPart = val; - } - int getTransformType() const - { - return transformType; - } - void setTransformType(int val) - { - transformType = val; - } - double getMaxTranslation() const - { - return maxTranslation; - } - void setMaxTranslation(double val) - { - maxTranslation = val; - } - double getMaxRotation() const - { - return maxRotation; - } - void setMaxRotation(double val) - { - maxRotation = val; - } - Ptr getNormalsComputer() const - { - return normalsComputer; - } - - protected: - virtual void - checkParams() const; - - virtual bool - computeImpl(const Ptr& srcFrame, const Ptr& dstFrame, Mat& Rt, - const Mat& initRt) const; - - // Some params have commented desired type. It's due to AlgorithmInfo::addParams does not support it now. - /*float*/ - double minDepth, maxDepth, maxDepthDiff; - /*float*/ - double maxPointsPart; - /*vector*/ - Mat iterCounts; - - Mat cameraMatrix; - int transformType; - - double maxTranslation, maxRotation; - - mutable Ptr normalsComputer; - }; - - /** Odometry that merges RgbdOdometry and ICPOdometry by minimize sum of their energy functions. - */ - - class RgbdICPOdometry: public Odometry - { - public: - RgbdICPOdometry(); - /** Constructor. - * @param cameraMatrix Camera matrix - * @param minDepth Pixels with depth less than minDepth will not be used - * @param maxDepth Pixels with depth larger than maxDepth will not be used - * @param maxDepthDiff Correspondences between pixels of two given frames will be filtered out - * if their depth difference is larger than maxDepthDiff - * @param maxPointsPart The method uses a random pixels subset of size frameWidth x frameHeight x pointsPart - * @param iterCounts Count of iterations on each pyramid level. - * @param minGradientMagnitudes For each pyramid level the pixels will be filtered out - * if they have gradient magnitude less than minGradientMagnitudes[level]. - * @param transformType Class of trasformation - */ - RgbdICPOdometry(const Mat& cameraMatrix, float minDepth = DEFAULT_MIN_DEPTH(), float maxDepth = DEFAULT_MAX_DEPTH(), - float maxDepthDiff = DEFAULT_MAX_DEPTH_DIFF(), float maxPointsPart = DEFAULT_MAX_POINTS_PART(), - const std::vector& iterCounts = std::vector(), - const std::vector& minGradientMagnitudes = std::vector(), - int transformType = RIGID_BODY_MOTION); - - virtual Size prepareFrameCache(Ptr& frame, int cacheType) const; - - cv::Mat getCameraMatrix() const - { - return cameraMatrix; - } - void setCameraMatrix(const cv::Mat &val) - { - cameraMatrix = val; - } - double getMinDepth() const - { - return minDepth; - } - void setMinDepth(double val) - { - minDepth = val; - } - double getMaxDepth() const - { - return maxDepth; - } - void setMaxDepth(double val) - { - maxDepth = val; - } - double getMaxDepthDiff() const - { - return maxDepthDiff; - } - void setMaxDepthDiff(double val) - { - maxDepthDiff = val; - } - double getMaxPointsPart() const - { - return maxPointsPart; - } - void setMaxPointsPart(double val) - { - maxPointsPart = val; - } - cv::Mat getIterationCounts() const - { - return iterCounts; - } - void setIterationCounts(const cv::Mat &val) - { - iterCounts = val; - } - cv::Mat getMinGradientMagnitudes() const - { - return minGradientMagnitudes; - } - void setMinGradientMagnitudes(const cv::Mat &val) - { - minGradientMagnitudes = val; - } - int getTransformType() const - { - return transformType; - } - void setTransformType(int val) - { - transformType = val; - } - double getMaxTranslation() const - { - return maxTranslation; - } - void setMaxTranslation(double val) - { - maxTranslation = val; - } - double getMaxRotation() const - { - return maxRotation; - } - void setMaxRotation(double val) - { - maxRotation = val; - } - Ptr getNormalsComputer() const - { - return normalsComputer; - } - - protected: - virtual void - checkParams() const; - - virtual bool - computeImpl(const Ptr& srcFrame, const Ptr& dstFrame, Mat& Rt, - const Mat& initRt) const; - - // Some params have commented desired type. It's due to AlgorithmInfo::addParams does not support it now. - /*float*/ - double minDepth, maxDepth, maxDepthDiff; - /*float*/ - double maxPointsPart; - /*vector*/ - Mat iterCounts; - /*vector*/ - Mat minGradientMagnitudes; - - Mat cameraMatrix; - int transformType; - - double maxTranslation, maxRotation; - - mutable Ptr normalsComputer; - }; - - /** Warp the image: compute 3d points from the depth, transform them using given transformation, - * then project color point cloud to an image plane. - * This function can be used to visualize results of the Odometry algorithm. - * @param image The image (of CV_8UC1 or CV_8UC3 type) - * @param depth The depth (of type used in depthTo3d fuction) - * @param mask The mask of used pixels (of CV_8UC1), it can be empty - * @param Rt The transformation that will be applied to the 3d points computed from the depth - * @param cameraMatrix Camera matrix - * @param distCoeff Distortion coefficients - * @param warpedImage The warped image. - * @param warpedDepth The warped depth. - * @param warpedMask The warped mask. - */ - CV_EXPORTS - void - warpFrame(const Mat& image, const Mat& depth, const Mat& mask, const Mat& Rt, const Mat& cameraMatrix, - const Mat& distCoeff, Mat& warpedImage, Mat* warpedDepth = 0, Mat* warpedMask = 0); - -// TODO Depth interpolation -// Curvature -// Get rescaleDepth return dubles if asked for - -//! @} - -} /* namespace rgbd */ -} /* namespace cv */ - -#include "opencv2/rgbd/linemod.hpp" - -#endif /* __cplusplus */ #endif /* End of file. */ - diff --git a/IPL/include/opencv/opencv2/rgbd/depth.hpp b/IPL/include/opencv/opencv2/rgbd/depth.hpp new file mode 100644 index 0000000..94fdca6 --- /dev/null +++ b/IPL/include/opencv/opencv2/rgbd/depth.hpp @@ -0,0 +1,1192 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html + +// This code is also subject to the license terms in the LICENSE_WillowGarage.md file found in this module's directory + +#ifndef __OPENCV_RGBD_DEPTH_HPP__ +#define __OPENCV_RGBD_DEPTH_HPP__ + +#include +#include + +namespace cv +{ +namespace rgbd +{ + +//! @addtogroup rgbd +//! @{ + + /** Checks if the value is a valid depth. For CV_16U or CV_16S, the convention is to be invalid if it is + * a limit. For a float/double, we just check if it is a NaN + * @param depth the depth to check for validity + */ + CV_EXPORTS + inline bool + isValidDepth(const float & depth) + { + return !cvIsNaN(depth); + } + CV_EXPORTS + inline bool + isValidDepth(const double & depth) + { + return !cvIsNaN(depth); + } + CV_EXPORTS + inline bool + isValidDepth(const short int & depth) + { + return (depth != std::numeric_limits::min()) && (depth != std::numeric_limits::max()); + } + CV_EXPORTS + inline bool + isValidDepth(const unsigned short int & depth) + { + return (depth != std::numeric_limits::min()) + && (depth != std::numeric_limits::max()); + } + CV_EXPORTS + inline bool + isValidDepth(const int & depth) + { + return (depth != std::numeric_limits::min()) && (depth != std::numeric_limits::max()); + } + CV_EXPORTS + inline bool + isValidDepth(const unsigned int & depth) + { + return (depth != std::numeric_limits::min()) && (depth != std::numeric_limits::max()); + } + + /** Object that can compute the normals in an image. + * It is an object as it can cache data for speed efficiency + * The implemented methods are either: + * - FALS (the fastest) and SRI from + * ``Fast and Accurate Computation of Surface Normals from Range Images`` + * by H. Badino, D. Huber, Y. Park and T. Kanade + * - the normals with bilateral filtering on a depth image from + * ``Gradient Response Maps for Real-Time Detection of Texture-Less Objects`` + * by S. Hinterstoisser, C. Cagniart, S. Ilic, P. Sturm, N. Navab, P. Fua, and V. Lepetit + */ + class CV_EXPORTS_W RgbdNormals: public Algorithm + { + public: + enum RGBD_NORMALS_METHOD + { + RGBD_NORMALS_METHOD_FALS = 0, + RGBD_NORMALS_METHOD_LINEMOD = 1, + RGBD_NORMALS_METHOD_SRI = 2 + }; + + RgbdNormals() + : + rows_(0), + cols_(0), + depth_(0), + K_(Mat()), + window_size_(0), + method_(RGBD_NORMALS_METHOD_FALS), + rgbd_normals_impl_(0) + { + } + + /** Constructor + * @param rows the number of rows of the depth image normals will be computed on + * @param cols the number of cols of the depth image normals will be computed on + * @param depth the depth of the normals (only CV_32F or CV_64F) + * @param K the calibration matrix to use + * @param window_size the window size to compute the normals: can only be 1,3,5 or 7 + * @param method one of the methods to use: RGBD_NORMALS_METHOD_SRI, RGBD_NORMALS_METHOD_FALS + */ + RgbdNormals(int rows, int cols, int depth, InputArray K, int window_size = 5, int method = + RgbdNormals::RGBD_NORMALS_METHOD_FALS); + + ~RgbdNormals(); + + CV_WRAP static Ptr create(int rows, int cols, int depth, InputArray K, int window_size = 5, int method = + RgbdNormals::RGBD_NORMALS_METHOD_FALS); + + /** Given a set of 3d points in a depth image, compute the normals at each point. + * @param points a rows x cols x 3 matrix of CV_32F/CV64F or a rows x cols x 1 CV_U16S + * @param normals a rows x cols x 3 matrix + */ + CV_WRAP_AS(apply) void + operator()(InputArray points, OutputArray normals) const; + + /** Initializes some data that is cached for later computation + * If that function is not called, it will be called the first time normals are computed + */ + CV_WRAP void + initialize() const; + + CV_WRAP int getRows() const + { + return rows_; + } + CV_WRAP void setRows(int val) + { + rows_ = val; + } + CV_WRAP int getCols() const + { + return cols_; + } + CV_WRAP void setCols(int val) + { + cols_ = val; + } + CV_WRAP int getWindowSize() const + { + return window_size_; + } + CV_WRAP void setWindowSize(int val) + { + window_size_ = val; + } + CV_WRAP int getDepth() const + { + return depth_; + } + CV_WRAP void setDepth(int val) + { + depth_ = val; + } + CV_WRAP cv::Mat getK() const + { + return K_; + } + CV_WRAP void setK(const cv::Mat &val) + { + K_ = val; + } + CV_WRAP int getMethod() const + { + return method_; + } + CV_WRAP void setMethod(int val) + { + method_ = val; + } + + protected: + void + initialize_normals_impl(int rows, int cols, int depth, const Mat & K, int window_size, int method) const; + + int rows_, cols_, depth_; + Mat K_; + int window_size_; + int method_; + mutable void* rgbd_normals_impl_; + }; + + /** Object that can clean a noisy depth image + */ + class CV_EXPORTS_W DepthCleaner: public Algorithm + { + public: + /** NIL method is from + * ``Modeling Kinect Sensor Noise for Improved 3d Reconstruction and Tracking`` + * by C. Nguyen, S. Izadi, D. Lovel + */ + enum DEPTH_CLEANER_METHOD + { + DEPTH_CLEANER_NIL + }; + + DepthCleaner() + : + depth_(0), + window_size_(0), + method_(DEPTH_CLEANER_NIL), + depth_cleaner_impl_(0) + { + } + + /** Constructor + * @param depth the depth of the normals (only CV_32F or CV_64F) + * @param window_size the window size to compute the normals: can only be 1,3,5 or 7 + * @param method one of the methods to use: RGBD_NORMALS_METHOD_SRI, RGBD_NORMALS_METHOD_FALS + */ + DepthCleaner(int depth, int window_size = 5, int method = DepthCleaner::DEPTH_CLEANER_NIL); + + ~DepthCleaner(); + + CV_WRAP static Ptr create(int depth, int window_size = 5, int method = DepthCleaner::DEPTH_CLEANER_NIL); + + /** Given a set of 3d points in a depth image, compute the normals at each point. + * @param points a rows x cols x 3 matrix of CV_32F/CV64F or a rows x cols x 1 CV_U16S + * @param depth a rows x cols matrix of the cleaned up depth + */ + CV_WRAP_AS(apply) void + operator()(InputArray points, OutputArray depth) const; + + /** Initializes some data that is cached for later computation + * If that function is not called, it will be called the first time normals are computed + */ + CV_WRAP void + initialize() const; + + CV_WRAP int getWindowSize() const + { + return window_size_; + } + CV_WRAP void setWindowSize(int val) + { + window_size_ = val; + } + CV_WRAP int getDepth() const + { + return depth_; + } + CV_WRAP void setDepth(int val) + { + depth_ = val; + } + CV_WRAP int getMethod() const + { + return method_; + } + CV_WRAP void setMethod(int val) + { + method_ = val; + } + + protected: + void + initialize_cleaner_impl() const; + + int depth_; + int window_size_; + int method_; + mutable void* depth_cleaner_impl_; + }; + + + /** Registers depth data to an external camera + * Registration is performed by creating a depth cloud, transforming the cloud by + * the rigid body transformation between the cameras, and then projecting the + * transformed points into the RGB camera. + * + * uv_rgb = K_rgb * [R | t] * z * inv(K_ir) * uv_ir + * + * Currently does not check for negative depth values. + * + * @param unregisteredCameraMatrix the camera matrix of the depth camera + * @param registeredCameraMatrix the camera matrix of the external camera + * @param registeredDistCoeffs the distortion coefficients of the external camera + * @param Rt the rigid body transform between the cameras. Transforms points from depth camera frame to external camera frame. + * @param unregisteredDepth the input depth data + * @param outputImagePlaneSize the image plane dimensions of the external camera (width, height) + * @param registeredDepth the result of transforming the depth into the external camera + * @param depthDilation whether or not the depth is dilated to avoid holes and occlusion errors (optional) + */ + CV_EXPORTS_W + void + registerDepth(InputArray unregisteredCameraMatrix, InputArray registeredCameraMatrix, InputArray registeredDistCoeffs, + InputArray Rt, InputArray unregisteredDepth, const Size& outputImagePlaneSize, + OutputArray registeredDepth, bool depthDilation=false); + + /** + * @param depth the depth image + * @param in_K + * @param in_points the list of xy coordinates + * @param points3d the resulting 3d points + */ + CV_EXPORTS_W + void + depthTo3dSparse(InputArray depth, InputArray in_K, InputArray in_points, OutputArray points3d); + + /** Converts a depth image to an organized set of 3d points. + * The coordinate system is x pointing left, y down and z away from the camera + * @param depth the depth image (if given as short int CV_U, it is assumed to be the depth in millimeters + * (as done with the Microsoft Kinect), otherwise, if given as CV_32F or CV_64F, it is assumed in meters) + * @param K The calibration matrix + * @param points3d the resulting 3d points. They are of depth the same as `depth` if it is CV_32F or CV_64F, and the + * depth of `K` if `depth` is of depth CV_U + * @param mask the mask of the points to consider (can be empty) + */ + CV_EXPORTS_W + void + depthTo3d(InputArray depth, InputArray K, OutputArray points3d, InputArray mask = noArray()); + + /** If the input image is of type CV_16UC1 (like the Kinect one), the image is converted to floats, divided + * by 1000 to get a depth in meters, and the values 0 are converted to std::numeric_limits::quiet_NaN() + * Otherwise, the image is simply converted to floats + * @param in the depth image (if given as short int CV_U, it is assumed to be the depth in millimeters + * (as done with the Microsoft Kinect), it is assumed in meters) + * @param depth the desired output depth (floats or double) + * @param out The rescaled float depth image + */ + CV_EXPORTS_W + void + rescaleDepth(InputArray in, int depth, OutputArray out); + + /** Object that can compute planes in an image + */ + class CV_EXPORTS_W RgbdPlane: public Algorithm + { + public: + enum RGBD_PLANE_METHOD + { + RGBD_PLANE_METHOD_DEFAULT + }; + + RgbdPlane(int method = RgbdPlane::RGBD_PLANE_METHOD_DEFAULT) + : + method_(method), + block_size_(40), + min_size_(block_size_*block_size_), + threshold_(0.01), + sensor_error_a_(0), + sensor_error_b_(0), + sensor_error_c_(0) + { + } + + /** Constructor + * @param block_size The size of the blocks to look at for a stable MSE + * @param min_size The minimum size of a cluster to be considered a plane + * @param threshold The maximum distance of a point from a plane to belong to it (in meters) + * @param sensor_error_a coefficient of the sensor error. 0 by default, 0.0075 for a Kinect + * @param sensor_error_b coefficient of the sensor error. 0 by default + * @param sensor_error_c coefficient of the sensor error. 0 by default + * @param method The method to use to compute the planes. + */ + RgbdPlane(int method, int block_size, + int min_size, double threshold, double sensor_error_a = 0, + double sensor_error_b = 0, double sensor_error_c = 0); + + ~RgbdPlane(); + + CV_WRAP static Ptr create(int method, int block_size, int min_size, double threshold, + double sensor_error_a = 0, double sensor_error_b = 0, + double sensor_error_c = 0); + + /** Find The planes in a depth image + * @param points3d the 3d points organized like the depth image: rows x cols with 3 channels + * @param normals the normals for every point in the depth image + * @param mask An image where each pixel is labeled with the plane it belongs to + * and 255 if it does not belong to any plane + * @param plane_coefficients the coefficients of the corresponding planes (a,b,c,d) such that ax+by+cz+d=0, norm(a,b,c)=1 + * and c < 0 (so that the normal points towards the camera) + */ + CV_WRAP_AS(apply) void + operator()(InputArray points3d, InputArray normals, OutputArray mask, + OutputArray plane_coefficients); + + /** Find The planes in a depth image but without doing a normal check, which is faster but less accurate + * @param points3d the 3d points organized like the depth image: rows x cols with 3 channels + * @param mask An image where each pixel is labeled with the plane it belongs to + * and 255 if it does not belong to any plane + * @param plane_coefficients the coefficients of the corresponding planes (a,b,c,d) such that ax+by+cz+d=0 + */ + CV_WRAP_AS(apply) void + operator()(InputArray points3d, OutputArray mask, OutputArray plane_coefficients); + + CV_WRAP int getBlockSize() const + { + return block_size_; + } + CV_WRAP void setBlockSize(int val) + { + block_size_ = val; + } + CV_WRAP int getMinSize() const + { + return min_size_; + } + CV_WRAP void setMinSize(int val) + { + min_size_ = val; + } + CV_WRAP int getMethod() const + { + return method_; + } + CV_WRAP void setMethod(int val) + { + method_ = val; + } + CV_WRAP double getThreshold() const + { + return threshold_; + } + CV_WRAP void setThreshold(double val) + { + threshold_ = val; + } + CV_WRAP double getSensorErrorA() const + { + return sensor_error_a_; + } + CV_WRAP void setSensorErrorA(double val) + { + sensor_error_a_ = val; + } + CV_WRAP double getSensorErrorB() const + { + return sensor_error_b_; + } + CV_WRAP void setSensorErrorB(double val) + { + sensor_error_b_ = val; + } + CV_WRAP double getSensorErrorC() const + { + return sensor_error_c_; + } + CV_WRAP void setSensorErrorC(double val) + { + sensor_error_c_ = val; + } + + private: + /** The method to use to compute the planes */ + int method_; + /** The size of the blocks to look at for a stable MSE */ + int block_size_; + /** The minimum size of a cluster to be considered a plane */ + int min_size_; + /** How far a point can be from a plane to belong to it (in meters) */ + double threshold_; + /** coefficient of the sensor error with respect to the. All 0 by default but you want a=0.0075 for a Kinect */ + double sensor_error_a_, sensor_error_b_, sensor_error_c_; + }; + + /** Object that contains a frame data. + */ + struct CV_EXPORTS_W RgbdFrame + { + RgbdFrame(); + RgbdFrame(const Mat& image, const Mat& depth, const Mat& mask=Mat(), const Mat& normals=Mat(), int ID=-1); + virtual ~RgbdFrame(); + + CV_WRAP static Ptr create(const Mat& image=Mat(), const Mat& depth=Mat(), const Mat& mask=Mat(), const Mat& normals=Mat(), int ID=-1); + + CV_WRAP virtual void + release(); + + CV_PROP int ID; + CV_PROP Mat image; + CV_PROP Mat depth; + CV_PROP Mat mask; + CV_PROP Mat normals; + }; + + /** Object that contains a frame data that is possibly needed for the Odometry. + * It's used for the efficiency (to pass precomputed/cached data of the frame that participates + * in the Odometry processing several times). + */ + struct CV_EXPORTS_W OdometryFrame : public RgbdFrame + { + /** These constants are used to set a type of cache which has to be prepared depending on the frame role: + * srcFrame or dstFrame (see compute method of the Odometry class). For the srcFrame and dstFrame different cache data may be required, + * some part of a cache may be common for both frame roles. + * @param CACHE_SRC The cache data for the srcFrame will be prepared. + * @param CACHE_DST The cache data for the dstFrame will be prepared. + * @param CACHE_ALL The cache data for both srcFrame and dstFrame roles will be computed. + */ + enum + { + CACHE_SRC = 1, CACHE_DST = 2, CACHE_ALL = CACHE_SRC + CACHE_DST + }; + + OdometryFrame(); + OdometryFrame(const Mat& image, const Mat& depth, const Mat& mask=Mat(), const Mat& normals=Mat(), int ID=-1); + + CV_WRAP static Ptr create(const Mat& image=Mat(), const Mat& depth=Mat(), const Mat& mask=Mat(), const Mat& normals=Mat(), int ID=-1); + + CV_WRAP virtual void + release() CV_OVERRIDE; + + CV_WRAP void + releasePyramids(); + + CV_PROP std::vector pyramidImage; + CV_PROP std::vector pyramidDepth; + CV_PROP std::vector pyramidMask; + + CV_PROP std::vector pyramidCloud; + + CV_PROP std::vector pyramid_dI_dx; + CV_PROP std::vector pyramid_dI_dy; + CV_PROP std::vector pyramidTexturedMask; + + CV_PROP std::vector pyramidNormals; + CV_PROP std::vector pyramidNormalsMask; + }; + + /** Base class for computation of odometry. + */ + class CV_EXPORTS_W Odometry: public Algorithm + { + public: + + /** A class of transformation*/ + enum + { + ROTATION = 1, TRANSLATION = 2, RIGID_BODY_MOTION = 4 + }; + + CV_WRAP static inline float + DEFAULT_MIN_DEPTH() + { + return 0.f; // in meters + } + CV_WRAP static inline float + DEFAULT_MAX_DEPTH() + { + return 4.f; // in meters + } + CV_WRAP static inline float + DEFAULT_MAX_DEPTH_DIFF() + { + return 0.07f; // in meters + } + CV_WRAP static inline float + DEFAULT_MAX_POINTS_PART() + { + return 0.07f; // in [0, 1] + } + CV_WRAP static inline float + DEFAULT_MAX_TRANSLATION() + { + return 0.15f; // in meters + } + CV_WRAP static inline float + DEFAULT_MAX_ROTATION() + { + return 15; // in degrees + } + + /** Method to compute a transformation from the source frame to the destination one. + * Some odometry algorithms do not used some data of frames (eg. ICP does not use images). + * In such case corresponding arguments can be set as empty Mat. + * The method returns true if all internal computions were possible (e.g. there were enough correspondences, + * system of equations has a solution, etc) and resulting transformation satisfies some test if it's provided + * by the Odometry inheritor implementation (e.g. thresholds for maximum translation and rotation). + * @param srcImage Image data of the source frame (CV_8UC1) + * @param srcDepth Depth data of the source frame (CV_32FC1, in meters) + * @param srcMask Mask that sets which pixels have to be used from the source frame (CV_8UC1) + * @param dstImage Image data of the destination frame (CV_8UC1) + * @param dstDepth Depth data of the destination frame (CV_32FC1, in meters) + * @param dstMask Mask that sets which pixels have to be used from the destination frame (CV_8UC1) + * @param Rt Resulting transformation from the source frame to the destination one (rigid body motion): + dst_p = Rt * src_p, where dst_p is a homogeneous point in the destination frame and src_p is + homogeneous point in the source frame, + Rt is 4x4 matrix of CV_64FC1 type. + * @param initRt Initial transformation from the source frame to the destination one (optional) + */ + CV_WRAP bool + compute(const Mat& srcImage, const Mat& srcDepth, const Mat& srcMask, const Mat& dstImage, const Mat& dstDepth, + const Mat& dstMask, OutputArray Rt, const Mat& initRt = Mat()) const; + + /** One more method to compute a transformation from the source frame to the destination one. + * It is designed to save on computing the frame data (image pyramids, normals, etc.). + */ + CV_WRAP_AS(compute2) bool + compute(Ptr& srcFrame, Ptr& dstFrame, OutputArray Rt, const Mat& initRt = Mat()) const; + + /** Prepare a cache for the frame. The function checks the precomputed/passed data (throws the error if this data + * does not satisfy) and computes all remaining cache data needed for the frame. Returned size is a resolution + * of the prepared frame. + * @param frame The odometry which will process the frame. + * @param cacheType The cache type: CACHE_SRC, CACHE_DST or CACHE_ALL. + */ + CV_WRAP virtual Size prepareFrameCache(Ptr& frame, int cacheType) const; + + CV_WRAP static Ptr create(const String & odometryType); + + /** @see setCameraMatrix */ + CV_WRAP virtual cv::Mat getCameraMatrix() const = 0; + /** @copybrief getCameraMatrix @see getCameraMatrix */ + CV_WRAP virtual void setCameraMatrix(const cv::Mat &val) = 0; + /** @see setTransformType */ + CV_WRAP virtual int getTransformType() const = 0; + /** @copybrief getTransformType @see getTransformType */ + CV_WRAP virtual void setTransformType(int val) = 0; + + protected: + virtual void + checkParams() const = 0; + + virtual bool + computeImpl(const Ptr& srcFrame, const Ptr& dstFrame, OutputArray Rt, + const Mat& initRt) const = 0; + }; + + /** Odometry based on the paper "Real-Time Visual Odometry from Dense RGB-D Images", + * F. Steinbucker, J. Strum, D. Cremers, ICCV, 2011. + */ + class CV_EXPORTS_W RgbdOdometry: public Odometry + { + public: + RgbdOdometry(); + /** Constructor. + * @param cameraMatrix Camera matrix + * @param minDepth Pixels with depth less than minDepth will not be used (in meters) + * @param maxDepth Pixels with depth larger than maxDepth will not be used (in meters) + * @param maxDepthDiff Correspondences between pixels of two given frames will be filtered out + * if their depth difference is larger than maxDepthDiff (in meters) + * @param iterCounts Count of iterations on each pyramid level. + * @param minGradientMagnitudes For each pyramid level the pixels will be filtered out + * if they have gradient magnitude less than minGradientMagnitudes[level]. + * @param maxPointsPart The method uses a random pixels subset of size frameWidth x frameHeight x pointsPart + * @param transformType Class of transformation + */ + RgbdOdometry(const Mat& cameraMatrix, float minDepth = Odometry::DEFAULT_MIN_DEPTH(), float maxDepth = Odometry::DEFAULT_MAX_DEPTH(), + float maxDepthDiff = Odometry::DEFAULT_MAX_DEPTH_DIFF(), const std::vector& iterCounts = std::vector(), + const std::vector& minGradientMagnitudes = std::vector(), float maxPointsPart = Odometry::DEFAULT_MAX_POINTS_PART(), + int transformType = Odometry::RIGID_BODY_MOTION); + + CV_WRAP static Ptr create(const Mat& cameraMatrix = Mat(), float minDepth = Odometry::DEFAULT_MIN_DEPTH(), float maxDepth = Odometry::DEFAULT_MAX_DEPTH(), + float maxDepthDiff = Odometry::DEFAULT_MAX_DEPTH_DIFF(), const std::vector& iterCounts = std::vector(), + const std::vector& minGradientMagnitudes = std::vector(), float maxPointsPart = Odometry::DEFAULT_MAX_POINTS_PART(), + int transformType = Odometry::RIGID_BODY_MOTION); + + CV_WRAP virtual Size prepareFrameCache(Ptr& frame, int cacheType) const CV_OVERRIDE; + + CV_WRAP cv::Mat getCameraMatrix() const CV_OVERRIDE + { + return cameraMatrix; + } + CV_WRAP void setCameraMatrix(const cv::Mat &val) CV_OVERRIDE + { + cameraMatrix = val; + } + CV_WRAP double getMinDepth() const + { + return minDepth; + } + CV_WRAP void setMinDepth(double val) + { + minDepth = val; + } + CV_WRAP double getMaxDepth() const + { + return maxDepth; + } + CV_WRAP void setMaxDepth(double val) + { + maxDepth = val; + } + CV_WRAP double getMaxDepthDiff() const + { + return maxDepthDiff; + } + CV_WRAP void setMaxDepthDiff(double val) + { + maxDepthDiff = val; + } + CV_WRAP cv::Mat getIterationCounts() const + { + return iterCounts; + } + CV_WRAP void setIterationCounts(const cv::Mat &val) + { + iterCounts = val; + } + CV_WRAP cv::Mat getMinGradientMagnitudes() const + { + return minGradientMagnitudes; + } + CV_WRAP void setMinGradientMagnitudes(const cv::Mat &val) + { + minGradientMagnitudes = val; + } + CV_WRAP double getMaxPointsPart() const + { + return maxPointsPart; + } + CV_WRAP void setMaxPointsPart(double val) + { + maxPointsPart = val; + } + CV_WRAP int getTransformType() const CV_OVERRIDE + { + return transformType; + } + CV_WRAP void setTransformType(int val) CV_OVERRIDE + { + transformType = val; + } + CV_WRAP double getMaxTranslation() const + { + return maxTranslation; + } + CV_WRAP void setMaxTranslation(double val) + { + maxTranslation = val; + } + CV_WRAP double getMaxRotation() const + { + return maxRotation; + } + CV_WRAP void setMaxRotation(double val) + { + maxRotation = val; + } + + protected: + virtual void + checkParams() const CV_OVERRIDE; + + virtual bool + computeImpl(const Ptr& srcFrame, const Ptr& dstFrame, OutputArray Rt, + const Mat& initRt) const CV_OVERRIDE; + + // Some params have commented desired type. It's due to AlgorithmInfo::addParams does not support it now. + /*float*/ + double minDepth, maxDepth, maxDepthDiff; + /*vector*/ + Mat iterCounts; + /*vector*/ + Mat minGradientMagnitudes; + double maxPointsPart; + + Mat cameraMatrix; + int transformType; + + double maxTranslation, maxRotation; + }; + + /** Odometry based on the paper "KinectFusion: Real-Time Dense Surface Mapping and Tracking", + * Richard A. Newcombe, Andrew Fitzgibbon, at al, SIGGRAPH, 2011. + */ + class CV_EXPORTS_W ICPOdometry: public Odometry + { + public: + ICPOdometry(); + /** Constructor. + * @param cameraMatrix Camera matrix + * @param minDepth Pixels with depth less than minDepth will not be used + * @param maxDepth Pixels with depth larger than maxDepth will not be used + * @param maxDepthDiff Correspondences between pixels of two given frames will be filtered out + * if their depth difference is larger than maxDepthDiff + * @param maxPointsPart The method uses a random pixels subset of size frameWidth x frameHeight x pointsPart + * @param iterCounts Count of iterations on each pyramid level. + * @param transformType Class of trasformation + */ + ICPOdometry(const Mat& cameraMatrix, float minDepth = Odometry::DEFAULT_MIN_DEPTH(), float maxDepth = Odometry::DEFAULT_MAX_DEPTH(), + float maxDepthDiff = Odometry::DEFAULT_MAX_DEPTH_DIFF(), float maxPointsPart = Odometry::DEFAULT_MAX_POINTS_PART(), + const std::vector& iterCounts = std::vector(), int transformType = Odometry::RIGID_BODY_MOTION); + + CV_WRAP static Ptr create(const Mat& cameraMatrix = Mat(), float minDepth = Odometry::DEFAULT_MIN_DEPTH(), float maxDepth = Odometry::DEFAULT_MAX_DEPTH(), + float maxDepthDiff = Odometry::DEFAULT_MAX_DEPTH_DIFF(), float maxPointsPart = Odometry::DEFAULT_MAX_POINTS_PART(), + const std::vector& iterCounts = std::vector(), int transformType = Odometry::RIGID_BODY_MOTION); + + CV_WRAP virtual Size prepareFrameCache(Ptr& frame, int cacheType) const CV_OVERRIDE; + + CV_WRAP cv::Mat getCameraMatrix() const CV_OVERRIDE + { + return cameraMatrix; + } + CV_WRAP void setCameraMatrix(const cv::Mat &val) CV_OVERRIDE + { + cameraMatrix = val; + } + CV_WRAP double getMinDepth() const + { + return minDepth; + } + CV_WRAP void setMinDepth(double val) + { + minDepth = val; + } + CV_WRAP double getMaxDepth() const + { + return maxDepth; + } + CV_WRAP void setMaxDepth(double val) + { + maxDepth = val; + } + CV_WRAP double getMaxDepthDiff() const + { + return maxDepthDiff; + } + CV_WRAP void setMaxDepthDiff(double val) + { + maxDepthDiff = val; + } + CV_WRAP cv::Mat getIterationCounts() const + { + return iterCounts; + } + CV_WRAP void setIterationCounts(const cv::Mat &val) + { + iterCounts = val; + } + CV_WRAP double getMaxPointsPart() const + { + return maxPointsPart; + } + CV_WRAP void setMaxPointsPart(double val) + { + maxPointsPart = val; + } + CV_WRAP int getTransformType() const CV_OVERRIDE + { + return transformType; + } + CV_WRAP void setTransformType(int val) CV_OVERRIDE + { + transformType = val; + } + CV_WRAP double getMaxTranslation() const + { + return maxTranslation; + } + CV_WRAP void setMaxTranslation(double val) + { + maxTranslation = val; + } + CV_WRAP double getMaxRotation() const + { + return maxRotation; + } + CV_WRAP void setMaxRotation(double val) + { + maxRotation = val; + } + CV_WRAP Ptr getNormalsComputer() const + { + return normalsComputer; + } + + protected: + virtual void + checkParams() const CV_OVERRIDE; + + virtual bool + computeImpl(const Ptr& srcFrame, const Ptr& dstFrame, OutputArray Rt, + const Mat& initRt) const CV_OVERRIDE; + + // Some params have commented desired type. It's due to AlgorithmInfo::addParams does not support it now. + /*float*/ + double minDepth, maxDepth, maxDepthDiff; + /*float*/ + double maxPointsPart; + /*vector*/ + Mat iterCounts; + + Mat cameraMatrix; + int transformType; + + double maxTranslation, maxRotation; + + mutable Ptr normalsComputer; + }; + + /** Odometry that merges RgbdOdometry and ICPOdometry by minimize sum of their energy functions. + */ + + class CV_EXPORTS_W RgbdICPOdometry: public Odometry + { + public: + RgbdICPOdometry(); + /** Constructor. + * @param cameraMatrix Camera matrix + * @param minDepth Pixels with depth less than minDepth will not be used + * @param maxDepth Pixels with depth larger than maxDepth will not be used + * @param maxDepthDiff Correspondences between pixels of two given frames will be filtered out + * if their depth difference is larger than maxDepthDiff + * @param maxPointsPart The method uses a random pixels subset of size frameWidth x frameHeight x pointsPart + * @param iterCounts Count of iterations on each pyramid level. + * @param minGradientMagnitudes For each pyramid level the pixels will be filtered out + * if they have gradient magnitude less than minGradientMagnitudes[level]. + * @param transformType Class of trasformation + */ + RgbdICPOdometry(const Mat& cameraMatrix, float minDepth = Odometry::DEFAULT_MIN_DEPTH(), float maxDepth = Odometry::DEFAULT_MAX_DEPTH(), + float maxDepthDiff = Odometry::DEFAULT_MAX_DEPTH_DIFF(), float maxPointsPart = Odometry::DEFAULT_MAX_POINTS_PART(), + const std::vector& iterCounts = std::vector(), + const std::vector& minGradientMagnitudes = std::vector(), + int transformType = Odometry::RIGID_BODY_MOTION); + + CV_WRAP static Ptr create(const Mat& cameraMatrix = Mat(), float minDepth = Odometry::DEFAULT_MIN_DEPTH(), float maxDepth = Odometry::DEFAULT_MAX_DEPTH(), + float maxDepthDiff = Odometry::DEFAULT_MAX_DEPTH_DIFF(), float maxPointsPart = Odometry::DEFAULT_MAX_POINTS_PART(), + const std::vector& iterCounts = std::vector(), + const std::vector& minGradientMagnitudes = std::vector(), + int transformType = Odometry::RIGID_BODY_MOTION); + + CV_WRAP virtual Size prepareFrameCache(Ptr& frame, int cacheType) const CV_OVERRIDE; + + CV_WRAP cv::Mat getCameraMatrix() const CV_OVERRIDE + { + return cameraMatrix; + } + CV_WRAP void setCameraMatrix(const cv::Mat &val) CV_OVERRIDE + { + cameraMatrix = val; + } + CV_WRAP double getMinDepth() const + { + return minDepth; + } + CV_WRAP void setMinDepth(double val) + { + minDepth = val; + } + CV_WRAP double getMaxDepth() const + { + return maxDepth; + } + CV_WRAP void setMaxDepth(double val) + { + maxDepth = val; + } + CV_WRAP double getMaxDepthDiff() const + { + return maxDepthDiff; + } + CV_WRAP void setMaxDepthDiff(double val) + { + maxDepthDiff = val; + } + CV_WRAP double getMaxPointsPart() const + { + return maxPointsPart; + } + CV_WRAP void setMaxPointsPart(double val) + { + maxPointsPart = val; + } + CV_WRAP cv::Mat getIterationCounts() const + { + return iterCounts; + } + CV_WRAP void setIterationCounts(const cv::Mat &val) + { + iterCounts = val; + } + CV_WRAP cv::Mat getMinGradientMagnitudes() const + { + return minGradientMagnitudes; + } + CV_WRAP void setMinGradientMagnitudes(const cv::Mat &val) + { + minGradientMagnitudes = val; + } + CV_WRAP int getTransformType() const CV_OVERRIDE + { + return transformType; + } + CV_WRAP void setTransformType(int val) CV_OVERRIDE + { + transformType = val; + } + CV_WRAP double getMaxTranslation() const + { + return maxTranslation; + } + CV_WRAP void setMaxTranslation(double val) + { + maxTranslation = val; + } + CV_WRAP double getMaxRotation() const + { + return maxRotation; + } + CV_WRAP void setMaxRotation(double val) + { + maxRotation = val; + } + CV_WRAP Ptr getNormalsComputer() const + { + return normalsComputer; + } + + protected: + virtual void + checkParams() const CV_OVERRIDE; + + virtual bool + computeImpl(const Ptr& srcFrame, const Ptr& dstFrame, OutputArray Rt, + const Mat& initRt) const CV_OVERRIDE; + + // Some params have commented desired type. It's due to AlgorithmInfo::addParams does not support it now. + /*float*/ + double minDepth, maxDepth, maxDepthDiff; + /*float*/ + double maxPointsPart; + /*vector*/ + Mat iterCounts; + /*vector*/ + Mat minGradientMagnitudes; + + Mat cameraMatrix; + int transformType; + + double maxTranslation, maxRotation; + + mutable Ptr normalsComputer; + }; + + /** A faster version of ICPOdometry which is used in KinectFusion implementation + * Partial list of differences: + * - Works in parallel + * - Written in universal intrinsics + * - Filters points by angle + * - Interpolates points and normals + * - Doesn't use masks or min/max depth filtering + * - Doesn't use random subsets of points + * - Supports only Rt transform type + * - Supports only 4-float vectors as input type + */ + class CV_EXPORTS_W FastICPOdometry: public Odometry + { + public: + FastICPOdometry(); + /** Constructor. + * @param cameraMatrix Camera matrix + * @param maxDistDiff Correspondences between pixels of two given frames will be filtered out + * if their depth difference is larger than maxDepthDiff + * @param angleThreshold Correspondence will be filtered out + * if an angle between their normals is bigger than threshold + * @param sigmaDepth Depth sigma in meters for bilateral smooth + * @param sigmaSpatial Spatial sigma in pixels for bilateral smooth + * @param kernelSize Kernel size in pixels for bilateral smooth + * @param iterCounts Count of iterations on each pyramid level + */ + FastICPOdometry(const Mat& cameraMatrix, + float maxDistDiff = Odometry::DEFAULT_MAX_DEPTH_DIFF(), + float angleThreshold = (float)(30. * CV_PI / 180.), + float sigmaDepth = 0.04f, + float sigmaSpatial = 4.5f, + int kernelSize = 7, + const std::vector& iterCounts = std::vector()); + + CV_WRAP static Ptr create(const Mat& cameraMatrix, + float maxDistDiff = Odometry::DEFAULT_MAX_DEPTH_DIFF(), + float angleThreshold = (float)(30. * CV_PI / 180.), + float sigmaDepth = 0.04f, + float sigmaSpatial = 4.5f, + int kernelSize = 7, + const std::vector& iterCounts = std::vector()); + + CV_WRAP virtual Size prepareFrameCache(Ptr& frame, int cacheType) const CV_OVERRIDE; + + CV_WRAP cv::Mat getCameraMatrix() const CV_OVERRIDE + { + return cameraMatrix; + } + CV_WRAP void setCameraMatrix(const cv::Mat &val) CV_OVERRIDE + { + cameraMatrix = val; + } + CV_WRAP double getMaxDistDiff() const + { + return maxDistDiff; + } + CV_WRAP void setMaxDistDiff(float val) + { + maxDistDiff = val; + } + CV_WRAP float getAngleThreshold() const + { + return angleThreshold; + } + CV_WRAP void setAngleThreshold(float f) + { + angleThreshold = f; + } + CV_WRAP float getSigmaDepth() const + { + return sigmaDepth; + } + CV_WRAP void setSigmaDepth(float f) + { + sigmaDepth = f; + } + CV_WRAP float getSigmaSpatial() const + { + return sigmaSpatial; + } + CV_WRAP void setSigmaSpatial(float f) + { + sigmaSpatial = f; + } + CV_WRAP int getKernelSize() const + { + return kernelSize; + } + CV_WRAP void setKernelSize(int f) + { + kernelSize = f; + } + CV_WRAP cv::Mat getIterationCounts() const + { + return iterCounts; + } + CV_WRAP void setIterationCounts(const cv::Mat &val) + { + iterCounts = val; + } + CV_WRAP int getTransformType() const CV_OVERRIDE + { + return Odometry::RIGID_BODY_MOTION; + } + CV_WRAP void setTransformType(int val) CV_OVERRIDE + { + if(val != Odometry::RIGID_BODY_MOTION) + throw std::runtime_error("Rigid Body Motion is the only accepted transformation type" + " for this odometry method"); + } + + protected: + virtual void + checkParams() const CV_OVERRIDE; + + virtual bool + computeImpl(const Ptr& srcFrame, const Ptr& dstFrame, OutputArray Rt, + const Mat& initRt) const CV_OVERRIDE; + + // Some params have commented desired type. It's due to AlgorithmInfo::addParams does not support it now. + float maxDistDiff; + + float angleThreshold; + + float sigmaDepth; + + float sigmaSpatial; + + int kernelSize; + + /*vector*/ + Mat iterCounts; + + Mat cameraMatrix; + }; + + /** Warp the image: compute 3d points from the depth, transform them using given transformation, + * then project color point cloud to an image plane. + * This function can be used to visualize results of the Odometry algorithm. + * @param image The image (of CV_8UC1 or CV_8UC3 type) + * @param depth The depth (of type used in depthTo3d fuction) + * @param mask The mask of used pixels (of CV_8UC1), it can be empty + * @param Rt The transformation that will be applied to the 3d points computed from the depth + * @param cameraMatrix Camera matrix + * @param distCoeff Distortion coefficients + * @param warpedImage The warped image. + * @param warpedDepth The warped depth. + * @param warpedMask The warped mask. + */ + CV_EXPORTS_W + void + warpFrame(const Mat& image, const Mat& depth, const Mat& mask, const Mat& Rt, const Mat& cameraMatrix, + const Mat& distCoeff, OutputArray warpedImage, OutputArray warpedDepth = noArray(), OutputArray warpedMask = noArray()); + +// TODO Depth interpolation +// Curvature +// Get rescaleDepth return dubles if asked for + +//! @} + +} /* namespace rgbd */ +} /* namespace cv */ + +#endif + +/* End of file. */ diff --git a/IPL/include/opencv/opencv2/rgbd/dynafu.hpp b/IPL/include/opencv/opencv2/rgbd/dynafu.hpp new file mode 100644 index 0000000..d057ebe --- /dev/null +++ b/IPL/include/opencv/opencv2/rgbd/dynafu.hpp @@ -0,0 +1,206 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html + +// This code is also subject to the license terms in the LICENSE_KinectFusion.md file found in this module's directory + +#ifndef __OPENCV_RGBD_DYNAFU_HPP__ +#define __OPENCV_RGBD_DYNAFU_HPP__ + +#include "opencv2/core.hpp" +#include "opencv2/core/affine.hpp" + +namespace cv { +namespace dynafu { + +struct CV_EXPORTS_W Params +{ + /** @brief Default parameters + A set of parameters which provides better model quality, can be very slow. + */ + CV_WRAP static Ptr defaultParams(); + + /** @brief Coarse parameters + A set of parameters which provides better speed, can fail to match frames + in case of rapid sensor motion. + */ + CV_WRAP static Ptr coarseParams(); + + /** @brief frame size in pixels */ + CV_PROP_RW Size frameSize; + + /** @brief camera intrinsics */ + CV_PROP Matx33f intr; + + /** @brief pre-scale per 1 meter for input values + + Typical values are: + * 5000 per 1 meter for the 16-bit PNG files of TUM database + * 1000 per 1 meter for Kinect 2 device + * 1 per 1 meter for the 32-bit float images in the ROS bag files + */ + CV_PROP_RW float depthFactor; + + /** @brief Depth sigma in meters for bilateral smooth */ + CV_PROP_RW float bilateral_sigma_depth; + /** @brief Spatial sigma in pixels for bilateral smooth */ + CV_PROP_RW float bilateral_sigma_spatial; + /** @brief Kernel size in pixels for bilateral smooth */ + CV_PROP_RW int bilateral_kernel_size; + + /** @brief Number of pyramid levels for ICP */ + CV_PROP_RW int pyramidLevels; + + /** @brief Resolution of voxel space + + Number of voxels in each dimension. + */ + CV_PROP_RW Vec3i volumeDims; + /** @brief Size of voxel in meters */ + CV_PROP_RW float voxelSize; + + /** @brief Minimal camera movement in meters + + Integrate new depth frame only if camera movement exceeds this value. + */ + CV_PROP_RW float tsdf_min_camera_movement; + + /** @brief initial volume pose in meters */ + Affine3f volumePose; + + /** @brief distance to truncate in meters + + Distances to surface that exceed this value will be truncated to 1.0. + */ + CV_PROP_RW float tsdf_trunc_dist; + + /** @brief max number of frames per voxel + + Each voxel keeps running average of distances no longer than this value. + */ + CV_PROP_RW int tsdf_max_weight; + + /** @brief A length of one raycast step + + How much voxel sizes we skip each raycast step + */ + CV_PROP_RW float raycast_step_factor; + + // gradient delta in voxel sizes + // fixed at 1.0f + // float gradient_delta_factor; + + /** @brief light pose for rendering in meters */ + CV_PROP Vec3f lightPose; + + /** @brief distance theshold for ICP in meters */ + CV_PROP_RW float icpDistThresh; + /** angle threshold for ICP in radians */ + CV_PROP_RW float icpAngleThresh; + /** number of ICP iterations for each pyramid level */ + CV_PROP std::vector icpIterations; + + /** @brief Threshold for depth truncation in meters + + All depth values beyond this threshold will be set to zero + */ + CV_PROP_RW float truncateThreshold; +}; + +/** @brief DynamicFusion implementation + + This class implements a 3d reconstruction algorithm as described in @cite dynamicfusion. + + It takes a sequence of depth images taken from depth sensor + (or any depth images source such as stereo camera matching algorithm or even raymarching renderer). + The output can be obtained as a vector of points and their normals + or can be Phong-rendered from given camera pose. + + It extends the KinectFusion algorithm to handle non-rigidly deforming scenes by maintaining a sparse + set of nodes covering the geometry such that each node contains a warp to transform it from a canonical + space to the live frame. + + An internal representation of a model is a voxel cuboid that keeps TSDF values + which are a sort of distances to the surface (for details read the @cite kinectfusion article about TSDF). + There is no interface to that representation yet. + + Note that DynamicFusion is based on the KinectFusion algorithm which is patented and its use may be + restricted by the list of patents mentioned in README.md file in this module directory. + + That's why you need to set the OPENCV_ENABLE_NONFREE option in CMake to use DynamicFusion. +*/ +class CV_EXPORTS_W DynaFu +{ +public: + CV_WRAP static Ptr create(const Ptr& _params); + virtual ~DynaFu(); + + /** @brief Get current parameters */ + virtual const Params& getParams() const = 0; + + /** @brief Renders a volume into an image + + Renders a 0-surface of TSDF using Phong shading into a CV_8UC4 Mat. + Light pose is fixed in DynaFu params. + + @param image resulting image + @param cameraPose pose of camera to render from. If empty then render from current pose + which is a last frame camera pose. + */ + + CV_WRAP virtual void render(OutputArray image, const Matx44f& cameraPose = Matx44f::eye()) const = 0; + + /** @brief Gets points and normals of current 3d mesh + + The order of normals corresponds to order of points. + The order of points is undefined. + + @param points vector of points which are 4-float vectors + @param normals vector of normals which are 4-float vectors + */ + CV_WRAP virtual void getCloud(OutputArray points, OutputArray normals) const = 0; + + /** @brief Gets points of current 3d mesh + + The order of points is undefined. + + @param points vector of points which are 4-float vectors + */ + CV_WRAP virtual void getPoints(OutputArray points) const = 0; + + /** @brief Calculates normals for given points + @param points input vector of points which are 4-float vectors + @param normals output vector of corresponding normals which are 4-float vectors + */ + CV_WRAP virtual void getNormals(InputArray points, OutputArray normals) const = 0; + + /** @brief Resets the algorithm + + Clears current model and resets a pose. + */ + CV_WRAP virtual void reset() = 0; + + /** @brief Get current pose in voxel space */ + virtual const Affine3f getPose() const = 0; + + /** @brief Process next depth frame + + Integrates depth into voxel space with respect to its ICP-calculated pose. + Input image is converted to CV_32F internally if has another type. + + @param depth one-channel image which size and depth scale is described in algorithm's parameters + @return true if succeeded to align new frame with current scene, false if opposite + */ + CV_WRAP virtual bool update(InputArray depth) = 0; + + virtual std::vector getNodesPos() const = 0; + + virtual void marchCubes(OutputArray vertices, OutputArray edges) const = 0; + + virtual void renderSurface(OutputArray depthImage, OutputArray vertImage, OutputArray normImage, bool warp=true) = 0; +}; + +//! @} +} +} +#endif diff --git a/IPL/include/opencv/opencv2/rgbd/kinfu.hpp b/IPL/include/opencv/opencv2/rgbd/kinfu.hpp new file mode 100644 index 0000000..a2c55bd --- /dev/null +++ b/IPL/include/opencv/opencv2/rgbd/kinfu.hpp @@ -0,0 +1,244 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html + +// This code is also subject to the license terms in the LICENSE_KinectFusion.md file found in this module's directory + +#ifndef __OPENCV_RGBD_KINFU_HPP__ +#define __OPENCV_RGBD_KINFU_HPP__ + +#include "opencv2/core.hpp" +#include "opencv2/core/affine.hpp" + +namespace cv { +namespace kinfu { +//! @addtogroup kinect_fusion +//! @{ + +struct CV_EXPORTS_W Params +{ + + CV_WRAP Params(){} + + /** + * @brief Constructor for Params + * Sets the initial pose of the TSDF volume. + * @param volumeIntialPoseRot rotation matrix + * @param volumeIntialPoseTransl translation vector + */ + CV_WRAP Params(Matx33f volumeIntialPoseRot, Vec3f volumeIntialPoseTransl) + { + setInitialVolumePose(volumeIntialPoseRot,volumeIntialPoseTransl); + } + + /** + * @brief Constructor for Params + * Sets the initial pose of the TSDF volume. + * @param volumeIntialPose 4 by 4 Homogeneous Transform matrix to set the intial pose of TSDF volume + */ + CV_WRAP Params(Matx44f volumeIntialPose) + { + setInitialVolumePose(volumeIntialPose); + } + + /** + * @brief Set Initial Volume Pose + * Sets the initial pose of the TSDF volume. + * @param R rotation matrix + * @param t translation vector + */ + CV_WRAP void setInitialVolumePose(Matx33f R, Vec3f t); + + /** + * @brief Set Initial Volume Pose + * Sets the initial pose of the TSDF volume. + * @param homogen_tf 4 by 4 Homogeneous Transform matrix to set the intial pose of TSDF volume + */ + CV_WRAP void setInitialVolumePose(Matx44f homogen_tf); + + /** + * @brief Default parameters + * A set of parameters which provides better model quality, can be very slow. + */ + CV_WRAP static Ptr defaultParams(); + + /** @brief Coarse parameters + A set of parameters which provides better speed, can fail to match frames + in case of rapid sensor motion. + */ + CV_WRAP static Ptr coarseParams(); + + /** @brief frame size in pixels */ + CV_PROP_RW Size frameSize; + + /** @brief camera intrinsics */ + CV_PROP_RW Matx33f intr; + + /** @brief pre-scale per 1 meter for input values + + Typical values are: + * 5000 per 1 meter for the 16-bit PNG files of TUM database + * 1000 per 1 meter for Kinect 2 device + * 1 per 1 meter for the 32-bit float images in the ROS bag files + */ + CV_PROP_RW float depthFactor; + + /** @brief Depth sigma in meters for bilateral smooth */ + CV_PROP_RW float bilateral_sigma_depth; + /** @brief Spatial sigma in pixels for bilateral smooth */ + CV_PROP_RW float bilateral_sigma_spatial; + /** @brief Kernel size in pixels for bilateral smooth */ + CV_PROP_RW int bilateral_kernel_size; + + /** @brief Number of pyramid levels for ICP */ + CV_PROP_RW int pyramidLevels; + + /** @brief Resolution of voxel space + + Number of voxels in each dimension. + */ + CV_PROP_RW Vec3i volumeDims; + /** @brief Size of voxel in meters */ + CV_PROP_RW float voxelSize; + + /** @brief Minimal camera movement in meters + + Integrate new depth frame only if camera movement exceeds this value. + */ + CV_PROP_RW float tsdf_min_camera_movement; + + /** @brief initial volume pose in meters */ + Affine3f volumePose; + + /** @brief distance to truncate in meters + + Distances to surface that exceed this value will be truncated to 1.0. + */ + CV_PROP_RW float tsdf_trunc_dist; + + /** @brief max number of frames per voxel + + Each voxel keeps running average of distances no longer than this value. + */ + CV_PROP_RW int tsdf_max_weight; + + /** @brief A length of one raycast step + + How much voxel sizes we skip each raycast step + */ + CV_PROP_RW float raycast_step_factor; + + // gradient delta in voxel sizes + // fixed at 1.0f + // float gradient_delta_factor; + + /** @brief light pose for rendering in meters */ + CV_PROP_RW Vec3f lightPose; + + /** @brief distance theshold for ICP in meters */ + CV_PROP_RW float icpDistThresh; + /** angle threshold for ICP in radians */ + CV_PROP_RW float icpAngleThresh; + /** number of ICP iterations for each pyramid level */ + CV_PROP_RW std::vector icpIterations; + + /** @brief Threshold for depth truncation in meters + + All depth values beyond this threshold will be set to zero + */ + CV_PROP_RW float truncateThreshold; +}; + +/** @brief KinectFusion implementation + + This class implements a 3d reconstruction algorithm described in + @cite kinectfusion paper. + + It takes a sequence of depth images taken from depth sensor + (or any depth images source such as stereo camera matching algorithm or even raymarching renderer). + The output can be obtained as a vector of points and their normals + or can be Phong-rendered from given camera pose. + + An internal representation of a model is a voxel cuboid that keeps TSDF values + which are a sort of distances to the surface (for details read the @cite kinectfusion article about TSDF). + There is no interface to that representation yet. + + KinFu uses OpenCL acceleration automatically if available. + To enable or disable it explicitly use cv::setUseOptimized() or cv::ocl::setUseOpenCL(). + + This implementation is based on [kinfu-remake](https://github.com/Nerei/kinfu_remake). + + Note that the KinectFusion algorithm was patented and its use may be restricted by + the list of patents mentioned in README.md file in this module directory. + + That's why you need to set the OPENCV_ENABLE_NONFREE option in CMake to use KinectFusion. +*/ +class CV_EXPORTS_W KinFu +{ +public: + CV_WRAP static Ptr create(const Ptr& _params); + virtual ~KinFu(); + + /** @brief Get current parameters */ + virtual const Params& getParams() const = 0; + + /** @brief Renders a volume into an image + + Renders a 0-surface of TSDF using Phong shading into a CV_8UC4 Mat. + Light pose is fixed in KinFu params. + + @param image resulting image + @param cameraPose pose of camera to render from. If empty then render from current pose + which is a last frame camera pose. + */ + + CV_WRAP virtual void render(OutputArray image, const Matx44f& cameraPose = Matx44f::eye()) const = 0; + + /** @brief Gets points and normals of current 3d mesh + + The order of normals corresponds to order of points. + The order of points is undefined. + + @param points vector of points which are 4-float vectors + @param normals vector of normals which are 4-float vectors + */ + CV_WRAP virtual void getCloud(OutputArray points, OutputArray normals) const = 0; + + /** @brief Gets points of current 3d mesh + + The order of points is undefined. + + @param points vector of points which are 4-float vectors + */ + CV_WRAP virtual void getPoints(OutputArray points) const = 0; + + /** @brief Calculates normals for given points + @param points input vector of points which are 4-float vectors + @param normals output vector of corresponding normals which are 4-float vectors + */ + CV_WRAP virtual void getNormals(InputArray points, OutputArray normals) const = 0; + + /** @brief Resets the algorithm + + Clears current model and resets a pose. + */ + CV_WRAP virtual void reset() = 0; + + /** @brief Get current pose in voxel space */ + virtual const Affine3f getPose() const = 0; + + /** @brief Process next depth frame + + Integrates depth into voxel space with respect to its ICP-calculated pose. + Input image is converted to CV_32F internally if has another type. + + @param depth one-channel image which size and depth scale is described in algorithm's parameters + @return true if succeeded to align new frame with current scene, false if opposite + */ + CV_WRAP virtual bool update(InputArray depth) = 0; +}; + +//! @} +} +} +#endif diff --git a/IPL/include/opencv/opencv2/rgbd/linemod.hpp b/IPL/include/opencv/opencv2/rgbd/linemod.hpp index ac56291..76b61bf 100644 --- a/IPL/include/opencv/opencv2/rgbd/linemod.hpp +++ b/IPL/include/opencv/opencv2/rgbd/linemod.hpp @@ -1,48 +1,11 @@ -/*M/////////////////////////////////////////////////////////////////////////////////////// -// -// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. -// -// By downloading, copying, installing or using the software you agree to this license. -// If you do not agree to this license, do not download, install, -// copy or use the software. -// -// -// License Agreement -// For Open Source Computer Vision Library -// -// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. -// Copyright (C) 2009, Willow Garage Inc., all rights reserved. -// Copyright (C) 2013, OpenCV Foundation, all rights reserved. -// Third party copyrights are property of their respective owners. -// -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: -// -// * Redistribution's of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// * Redistribution's in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// * The name of the copyright holders may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// This software is provided by the copyright holders and contributors "as is" and -// any express or implied warranties, including, but not limited to, the implied -// warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, -// indirect, incidental, special, exemplary, or consequential damages -// (including, but not limited to, procurement of substitute goods or services; -// loss of use, data, or profits; or business interruption) however caused -// and on any theory of liability, whether in contract, strict liability, -// or tort (including negligence or otherwise) arising in any way out of -// the use of this software, even if advised of the possibility of such damage. -// -//M*/ - -#ifndef __OPENCV_OBJDETECT_LINEMOD_HPP__ -#define __OPENCV_OBJDETECT_LINEMOD_HPP__ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html + +// This code is also subject to the license terms in the LICENSE_WillowGarage.md file found in this module's directory + +#ifndef __OPENCV_RGBD_LINEMOD_HPP__ +#define __OPENCV_RGBD_LINEMOD_HPP__ #include "opencv2/core.hpp" #include @@ -60,14 +23,14 @@ namespace linemod { /** * \brief Discriminant feature described by its location and label. */ -struct CV_EXPORTS Feature +struct CV_EXPORTS_W_SIMPLE Feature { - int x; ///< x offset - int y; ///< y offset - int label; ///< Quantization + CV_PROP_RW int x; ///< x offset + CV_PROP_RW int y; ///< y offset + CV_PROP_RW int label; ///< Quantization - Feature() : x(0), y(0), label(0) {} - Feature(int x, int y, int label); + CV_WRAP Feature() : x(0), y(0), label(0) {} + CV_WRAP Feature(int x, int y, int label); void read(const FileNode& fn); void write(FileStorage& fs) const; @@ -75,12 +38,12 @@ struct CV_EXPORTS Feature inline Feature::Feature(int _x, int _y, int _label) : x(_x), y(_y), label(_label) {} -struct CV_EXPORTS Template +struct CV_EXPORTS_W_SIMPLE Template { - int width; - int height; - int pyramid_level; - std::vector features; + CV_PROP int width; + CV_PROP int height; + CV_PROP int pyramid_level; + CV_PROP std::vector features; void read(const FileNode& fn); void write(FileStorage& fs) const; @@ -89,7 +52,7 @@ struct CV_EXPORTS Template /** * \brief Represents a modality operating over an image pyramid. */ -class QuantizedPyramid +class CV_EXPORTS_W QuantizedPyramid { public: // Virtual destructor @@ -101,21 +64,21 @@ class QuantizedPyramid * \param[out] dst The destination 8-bit image. For each pixel at most one bit is set, * representing its classification. */ - virtual void quantize(Mat& dst) const =0; + CV_WRAP virtual void quantize(CV_OUT Mat& dst) const =0; /** * \brief Extract most discriminant features at current pyramid level to form a new template. * * \param[out] templ The new template. */ - virtual bool extractTemplate(Template& templ) const =0; + CV_WRAP virtual bool extractTemplate(CV_OUT Template& templ) const =0; /** * \brief Go to the next pyramid level. * * \todo Allow pyramid scale factor other than 2 */ - virtual void pyrDown() =0; + CV_WRAP virtual void pyrDown() =0; protected: /// Candidate feature with a score @@ -153,7 +116,7 @@ inline QuantizedPyramid::Candidate::Candidate(int x, int y, int label, float _sc * * \todo Max response, to allow optimization of summing (255/MAX) features as uint8 */ -class CV_EXPORTS Modality +class CV_EXPORTS_W Modality { public: // Virtual destructor @@ -166,15 +129,15 @@ class CV_EXPORTS Modality * \param[in] mask Optional mask. If not empty, unmasked pixels are set to zero * in quantized image and cannot be extracted as features. */ - Ptr process(const Mat& src, + CV_WRAP Ptr process(const Mat& src, const Mat& mask = Mat()) const { return processImpl(src, mask); } - virtual String name() const =0; + CV_WRAP virtual String name() const =0; - virtual void read(const FileNode& fn) =0; + CV_WRAP virtual void read(const FileNode& fn) =0; virtual void write(FileStorage& fs) const =0; /** @@ -184,12 +147,12 @@ class CV_EXPORTS Modality * - "ColorGradient" * - "DepthNormal" */ - static Ptr create(const String& modality_type); + CV_WRAP static Ptr create(const String& modality_type); /** * \brief Load a modality from file. */ - static Ptr create(const FileNode& fn); + CV_WRAP static Ptr create(const FileNode& fn); protected: // Indirection is because process() has a default parameter. @@ -200,7 +163,7 @@ class CV_EXPORTS Modality /** * \brief Modality that computes quantized gradient orientations from a color image. */ -class CV_EXPORTS ColorGradient : public Modality +class CV_EXPORTS_W ColorGradient : public Modality { public: /** @@ -218,24 +181,26 @@ class CV_EXPORTS ColorGradient : public Modality */ ColorGradient(float weak_threshold, size_t num_features, float strong_threshold); - virtual String name() const; + CV_WRAP static Ptr create(float weak_threshold, size_t num_features, float strong_threshold); - virtual void read(const FileNode& fn); - virtual void write(FileStorage& fs) const; + virtual String name() const CV_OVERRIDE; - float weak_threshold; - size_t num_features; - float strong_threshold; + virtual void read(const FileNode& fn) CV_OVERRIDE; + virtual void write(FileStorage& fs) const CV_OVERRIDE; + + CV_PROP float weak_threshold; + CV_PROP size_t num_features; + CV_PROP float strong_threshold; protected: virtual Ptr processImpl(const Mat& src, - const Mat& mask) const; + const Mat& mask) const CV_OVERRIDE; }; /** * \brief Modality that computes quantized surface normals from a dense depth map. */ -class CV_EXPORTS DepthNormal : public Modality +class CV_EXPORTS_W DepthNormal : public Modality { public: /** @@ -256,36 +221,48 @@ class CV_EXPORTS DepthNormal : public Modality DepthNormal(int distance_threshold, int difference_threshold, size_t num_features, int extract_threshold); - virtual String name() const; + CV_WRAP static Ptr create(int distance_threshold, int difference_threshold, + size_t num_features, int extract_threshold); + + virtual String name() const CV_OVERRIDE; - virtual void read(const FileNode& fn); - virtual void write(FileStorage& fs) const; + virtual void read(const FileNode& fn) CV_OVERRIDE; + virtual void write(FileStorage& fs) const CV_OVERRIDE; - int distance_threshold; - int difference_threshold; - size_t num_features; - int extract_threshold; + CV_PROP int distance_threshold; + CV_PROP int difference_threshold; + CV_PROP size_t num_features; + CV_PROP int extract_threshold; protected: virtual Ptr processImpl(const Mat& src, - const Mat& mask) const; + const Mat& mask) const CV_OVERRIDE; }; /** * \brief Debug function to colormap a quantized image for viewing. */ -void colormap(const Mat& quantized, Mat& dst); +CV_EXPORTS_W void colormap(const Mat& quantized, CV_OUT Mat& dst); + +/** + * \brief Debug function to draw linemod features + * @param img + * @param templates see @ref Detector::addTemplate + * @param tl template bbox top-left offset see @ref Detector::addTemplate + * @param size marker size see @ref cv::drawMarker + */ +CV_EXPORTS_W void drawFeatures(InputOutputArray img, const std::vector